diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml index a72af5c9a82..e962661dd4f 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml @@ -252,6 +252,7 @@ spec: name: type: string enum: + - original - fashion-mnist range: type: object @@ -262,6 +263,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml index bbdfd8dd1d8..6cf2d16630d 100644 --- a/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml +++ b/charts/vald-benchmark-operator/crds/valdbenchmarkscenario.yaml @@ -84,6 +84,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/charts/vald-benchmark-operator/job-values.schema.json b/charts/vald-benchmark-operator/job-values.schema.json index 54ebd9b6a7d..20b50910b6c 100644 --- a/charts/vald-benchmark-operator/job-values.schema.json +++ b/charts/vald-benchmark-operator/job-values.schema.json @@ -305,7 +305,7 @@ "name": { "type": "string", "description": "the name of dataset", - "enum": ["fashion-mnist"] + "enum": ["original", "fashion-mnist"] }, "range": { "type": "object", @@ -321,7 +321,12 @@ "description": "start index number", "minimum": 1 } - } + }, + "required": ["start", "end"] + }, + "url": { + "type": "string", + "description": "the dataset url which is used for executing benchmark job with user defined hdf5 file" } }, "required": ["name", "indexes", "group", "range"] diff --git a/charts/vald-benchmark-operator/scenario-values.schema.json b/charts/vald-benchmark-operator/scenario-values.schema.json index e3e549e69d6..8c6d1172765 100644 --- a/charts/vald-benchmark-operator/scenario-values.schema.json +++ b/charts/vald-benchmark-operator/scenario-values.schema.json @@ -36,7 +36,12 @@ "description": "start index number", "minimum": 1 } - } + }, + "required": ["start", "end"] + }, + "url": { + "type": "string", + "description": "the dataset url which is used for executing benchmark job with user defined hdf5 file" } }, "required": ["name", "indexes", "group", "range"] diff --git a/charts/vald-benchmark-operator/schemas/job-values.yaml b/charts/vald-benchmark-operator/schemas/job-values.yaml index 89a3b1c8e84..835cb239dc1 100644 --- a/charts/vald-benchmark-operator/schemas/job-values.yaml +++ b/charts/vald-benchmark-operator/schemas/job-values.yaml @@ -27,7 +27,7 @@ target: # @schema {"name": "dataset", "type": "object", "required": ["name", "indexes", "group", "range"]} # dataset -- dataset information dataset: - # @schema {"name": "dataset.name", "type": "string", "enum": ["fashion-mnist"] } + # @schema {"name": "dataset.name", "type": "string", "enum": ["original", "fashion-mnist"] } # dataset.name -- the name of dataset name: "fashion-mnist" # @schema {"name": "dataset.indexes", "type": "integer", "minimum": 0} @@ -36,7 +36,7 @@ dataset: # @schema {"name": "dataset.group", "type": "string", "minLength": 1} # dataset.group -- the hdf5 group name of dataset group: "test" - # @schema {"name": "dataset.range", "type": "object", "range": ["start", "port"]} + # @schema {"name": "dataset.range", "type": "object", "required": ["start", "end"]} # dataset.range -- the data range of indexes range: # @schema {"name": "dataset.range.start", "type": "integer", "minimum": 1} @@ -45,6 +45,9 @@ dataset: # @schema {"name": "dataset.range.end", "type": "integer", "minimum": 1} # dataset.range.end -- end index number end: 1000 + # @schema {"name": "dataset.url", "type": "string"} + # dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file + url: "" # @schema {"name": "dimension", "type": "integer", "minimum": 1} # dimension -- vector dimension dimension: 784 diff --git a/charts/vald-benchmark-operator/schemas/scenario-values.yaml b/charts/vald-benchmark-operator/schemas/scenario-values.yaml index 66a32ea707a..1cb0c6c68df 100644 --- a/charts/vald-benchmark-operator/schemas/scenario-values.yaml +++ b/charts/vald-benchmark-operator/schemas/scenario-values.yaml @@ -37,7 +37,7 @@ dataset: # @schema {"name": "dataset.group", "type": "string", "minLength": 1} # dataset.group -- the hdf5 group name of dataset group: "test" - # @schema {"name": "dataset.range", "type": "object", "range": ["start", "port"]} + # @schema {"name": "dataset.range", "type": "object", "required": ["start", "end"]} # dataset.range -- the data range of indexes range: # @schema {"name": "dataset.range.start", "type": "integer", "minimum": 1} @@ -46,6 +46,9 @@ dataset: # @schema {"name": "dataset.range.end", "type": "integer", "minimum": 1} # dataset.range.end -- end index number end: 1000 + # @schema {"name": "dataset.url", "type": "string"} + # dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file + url: "" # @schema {"name": "jobs", "type": "array", "items": {"type": "object"}} jobs: diff --git a/internal/config/benchmark.go b/internal/config/benchmark.go index 14257561531..aa3cf1c687d 100644 --- a/internal/config/benchmark.go +++ b/internal/config/benchmark.go @@ -62,10 +62,13 @@ type BenchmarkDataset struct { Group string `json:"group,omitempty"` Indexes int `json:"indexes,omitempty"` Range *BenchmarkDatasetRange `json:"range,omitempty"` + URL string `json:"url,omitempty"` } func (d *BenchmarkDataset) Bind() *BenchmarkDataset { d.Name = GetActualValue(d.Name) + d.Group = GetActualValue(d.Group) + d.URL = GetActualValue(d.URL) return d } diff --git a/internal/test/data/hdf5/hdf5.go b/internal/test/data/hdf5/hdf5.go index f178a52c2e1..8fedfd76b1b 100644 --- a/internal/test/data/hdf5/hdf5.go +++ b/internal/test/data/hdf5/hdf5.go @@ -28,7 +28,7 @@ import ( ) type Data interface { - Download() error + Download(url string) error Read() error GetName() DatasetName GetPath() string @@ -41,11 +41,14 @@ type Data interface { type DatasetName int const ( - FashionMNIST784Euclidean DatasetName = iota + Original DatasetName = iota + FashionMNIST784Euclidean ) func (d DatasetName) String() string { switch d { + case Original: + return "original" case FashionMNIST784Euclidean: return "fashion-mnist-784-euc" default: @@ -109,8 +112,10 @@ func New(opts ...Option) (Data, error) { // Get downloads the hdf5 file. // https://github.com/erikbern/ann-benchmarks/#data-sets -func (d *data) Download() error { +func (d *data) Download(url string) error { switch d.name { + case Original: + return downloadFile(url, d.path) case FashionMNIST784Euclidean: return downloadFile(FashionMNIST784EuclideanUrl.String(), d.path) default: diff --git a/internal/test/data/hdf5/hdf5_test.go b/internal/test/data/hdf5/hdf5_test.go index d308048781d..509de3c6e54 100644 --- a/internal/test/data/hdf5/hdf5_test.go +++ b/internal/test/data/hdf5/hdf5_test.go @@ -353,12 +353,16 @@ func Test_data_Download(t *testing.T) { test [][]float32 neighbors [][]int } + type args struct { + url string + } type want struct { err error } type test struct { name string fields fields + args args want want checkFunc func(want, error) error beforeFunc func(*testing.T) @@ -440,7 +444,7 @@ func Test_data_Download(t *testing.T) { neighbors: test.fields.neighbors, } - err := d.Download() + err := d.Download(test.args.url) if err := checkFunc(test.want, err); err != nil { tt.Errorf("error = %v", err) } diff --git a/internal/test/data/hdf5/option.go b/internal/test/data/hdf5/option.go index 6fee9d0ae37..4d02b281f27 100644 --- a/internal/test/data/hdf5/option.go +++ b/internal/test/data/hdf5/option.go @@ -31,6 +31,8 @@ var defaultOptions = []Option{ func WithNameByString(n string) Option { var name DatasetName switch n { + case Original.String(): + name = Original case FashionMNIST784Euclidean.String(): name = FashionMNIST784Euclidean } @@ -40,6 +42,8 @@ func WithNameByString(n string) Option { func WithName(dn DatasetName) Option { return func(d *data) error { switch dn { + case Original: + d.name = dn case FashionMNIST784Euclidean: d.name = dn default: diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml index a72af5c9a82..e962661dd4f 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml @@ -252,6 +252,7 @@ spec: name: type: string enum: + - original - fashion-mnist range: type: object @@ -262,6 +263,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml index bbdfd8dd1d8..6cf2d16630d 100644 --- a/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml +++ b/k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml @@ -84,6 +84,11 @@ spec: start: type: integer minimum: 1 + required: + - start + - end + url: + type: string required: - name - indexes diff --git a/pkg/tools/benchmark/job/service/job.go b/pkg/tools/benchmark/job/service/job.go index 2736ad898c9..887fb31ca16 100644 --- a/pkg/tools/benchmark/job/service/job.go +++ b/pkg/tools/benchmark/job/service/job.go @@ -191,7 +191,7 @@ func New(opts ...Option) (Job, error) { func (j *job) PreStart(ctx context.Context) error { log.Infof("[benchmark job] start download dataset of %s", j.hdf5.GetName().String()) - if err := j.hdf5.Download(); err != nil { + if err := j.hdf5.Download(j.dataset.URL); err != nil { return err } log.Infof("[benchmark job] success download dataset of %s", j.hdf5.GetName().String()) diff --git a/pkg/tools/benchmark/job/service/option.go b/pkg/tools/benchmark/job/service/option.go index 772e638a6ff..6f83979a4ce 100644 --- a/pkg/tools/benchmark/job/service/option.go +++ b/pkg/tools/benchmark/job/service/option.go @@ -141,12 +141,15 @@ func WithHdf5(d hdf5.Data) Option { } } -// WithDataset sets the config.BenchmarkDataset including benchmakr dataset name, group name of hdf5.Data, the number of index, start range and end range. +// WithDataset sets the config.BenchmarkDataset including benchmark dataset name, group name of hdf5.Data, the number of index, start range and end range, and original URL which is used for download user defined hdf5. func WithDataset(d *config.BenchmarkDataset) Option { return func(j *job) error { if d == nil { return errors.NewErrInvalidOption("dataset", d) } + if d.Name == hdf5.Original.String() && len(d.URL) == 0 { + return errors.NewErrInvalidOption("dataset", d) + } j.dataset = d return nil }