Skip to content

Commit

Permalink
♻️ Add download original dataset URL option
Browse files Browse the repository at this point in the history
Signed-off-by: vankichi <[email protected]>
  • Loading branch information
vankichi committed Jun 1, 2023
1 parent d35d7b4 commit 24ce258
Show file tree
Hide file tree
Showing 14 changed files with 69 additions and 12 deletions.
6 changes: 6 additions & 0 deletions charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ spec:
name:
type: string
enum:
- original
- fashion-mnist
range:
type: object
Expand All @@ -262,6 +263,11 @@ spec:
start:
type: integer
minimum: 1
required:
- start
- end
url:
type: string
required:
- name
- indexes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ spec:
start:
type: integer
minimum: 1
required:
- start
- end
url:
type: string
required:
- name
- indexes
Expand Down
9 changes: 7 additions & 2 deletions charts/vald-benchmark-operator/job-values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@
"name": {
"type": "string",
"description": "the name of dataset",
"enum": ["fashion-mnist"]
"enum": ["original", "fashion-mnist"]
},
"range": {
"type": "object",
Expand All @@ -321,7 +321,12 @@
"description": "start index number",
"minimum": 1
}
}
},
"required": ["start", "end"]
},
"url": {
"type": "string",
"description": "the dataset url which is used for executing benchmark job with user defined hdf5 file"
}
},
"required": ["name", "indexes", "group", "range"]
Expand Down
7 changes: 6 additions & 1 deletion charts/vald-benchmark-operator/scenario-values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@
"description": "start index number",
"minimum": 1
}
}
},
"required": ["start", "end"]
},
"url": {
"type": "string",
"description": "the dataset url which is used for executing benchmark job with user defined hdf5 file"
}
},
"required": ["name", "indexes", "group", "range"]
Expand Down
7 changes: 5 additions & 2 deletions charts/vald-benchmark-operator/schemas/job-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ target:
# @schema {"name": "dataset", "type": "object", "required": ["name", "indexes", "group", "range"]}
# dataset -- dataset information
dataset:
# @schema {"name": "dataset.name", "type": "string", "enum": ["fashion-mnist"] }
# @schema {"name": "dataset.name", "type": "string", "enum": ["original", "fashion-mnist"] }
# dataset.name -- the name of dataset
name: "fashion-mnist"
# @schema {"name": "dataset.indexes", "type": "integer", "minimum": 0}
Expand All @@ -36,7 +36,7 @@ dataset:
# @schema {"name": "dataset.group", "type": "string", "minLength": 1}
# dataset.group -- the hdf5 group name of dataset
group: "test"
# @schema {"name": "dataset.range", "type": "object", "range": ["start", "port"]}
# @schema {"name": "dataset.range", "type": "object", "required": ["start", "end"]}
# dataset.range -- the data range of indexes
range:
# @schema {"name": "dataset.range.start", "type": "integer", "minimum": 1}
Expand All @@ -45,6 +45,9 @@ dataset:
# @schema {"name": "dataset.range.end", "type": "integer", "minimum": 1}
# dataset.range.end -- end index number
end: 1000
# @schema {"name": "dataset.url", "type": "string"}
# dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file
url: ""
# @schema {"name": "dimension", "type": "integer", "minimum": 1}
# dimension -- vector dimension
dimension: 784
Expand Down
5 changes: 4 additions & 1 deletion charts/vald-benchmark-operator/schemas/scenario-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ dataset:
# @schema {"name": "dataset.group", "type": "string", "minLength": 1}
# dataset.group -- the hdf5 group name of dataset
group: "test"
# @schema {"name": "dataset.range", "type": "object", "range": ["start", "port"]}
# @schema {"name": "dataset.range", "type": "object", "required": ["start", "end"]}
# dataset.range -- the data range of indexes
range:
# @schema {"name": "dataset.range.start", "type": "integer", "minimum": 1}
Expand All @@ -46,6 +46,9 @@ dataset:
# @schema {"name": "dataset.range.end", "type": "integer", "minimum": 1}
# dataset.range.end -- end index number
end: 1000
# @schema {"name": "dataset.url", "type": "string"}
# dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file
url: ""

# @schema {"name": "jobs", "type": "array", "items": {"type": "object"}}
jobs:
Expand Down
3 changes: 3 additions & 0 deletions internal/config/benchmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,13 @@ type BenchmarkDataset struct {
Group string `json:"group,omitempty"`
Indexes int `json:"indexes,omitempty"`
Range *BenchmarkDatasetRange `json:"range,omitempty"`
URL string `json:"url,omitempty"`
}

func (d *BenchmarkDataset) Bind() *BenchmarkDataset {
d.Name = GetActualValue(d.Name)
d.Group = GetActualValue(d.Group)
d.URL = GetActualValue(d.URL)
return d
}

Expand Down
11 changes: 8 additions & 3 deletions internal/test/data/hdf5/hdf5.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
)

type Data interface {
Download() error
Download(url string) error
Read() error
GetName() DatasetName
GetPath() string
Expand All @@ -41,11 +41,14 @@ type Data interface {
type DatasetName int

const (
FashionMNIST784Euclidean DatasetName = iota
Original DatasetName = iota
FashionMNIST784Euclidean
)

func (d DatasetName) String() string {
switch d {
case Original:
return "original"
case FashionMNIST784Euclidean:
return "fashion-mnist-784-euc"
default:
Expand Down Expand Up @@ -109,8 +112,10 @@ func New(opts ...Option) (Data, error) {

// Get downloads the hdf5 file.
// https://github.com/erikbern/ann-benchmarks/#data-sets
func (d *data) Download() error {
func (d *data) Download(url string) error {
switch d.name {
case Original:
return downloadFile(url, d.path)
case FashionMNIST784Euclidean:
return downloadFile(FashionMNIST784EuclideanUrl.String(), d.path)
default:
Expand Down
6 changes: 5 additions & 1 deletion internal/test/data/hdf5/hdf5_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,12 +353,16 @@ func Test_data_Download(t *testing.T) {
test [][]float32
neighbors [][]int
}
type args struct {
url string
}
type want struct {
err error
}
type test struct {
name string
fields fields
args args
want want
checkFunc func(want, error) error
beforeFunc func(*testing.T)
Expand Down Expand Up @@ -440,7 +444,7 @@ func Test_data_Download(t *testing.T) {
neighbors: test.fields.neighbors,
}

err := d.Download()
err := d.Download(test.args.url)
if err := checkFunc(test.want, err); err != nil {
tt.Errorf("error = %v", err)
}
Expand Down
4 changes: 4 additions & 0 deletions internal/test/data/hdf5/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ var defaultOptions = []Option{
func WithNameByString(n string) Option {
var name DatasetName
switch n {
case Original.String():
name = Original
case FashionMNIST784Euclidean.String():
name = FashionMNIST784Euclidean
}
Expand All @@ -40,6 +42,8 @@ func WithNameByString(n string) Option {
func WithName(dn DatasetName) Option {
return func(d *data) error {
switch dn {
case Original:
d.name = dn
case FashionMNIST784Euclidean:
d.name = dn
default:
Expand Down
6 changes: 6 additions & 0 deletions k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ spec:
name:
type: string
enum:
- original
- fashion-mnist
range:
type: object
Expand All @@ -262,6 +263,11 @@ spec:
start:
type: integer
minimum: 1
required:
- start
- end
url:
type: string
required:
- name
- indexes
Expand Down
5 changes: 5 additions & 0 deletions k8s/tools/benchmark/operator/crds/valdbenchmarkscenario.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ spec:
start:
type: integer
minimum: 1
required:
- start
- end
url:
type: string
required:
- name
- indexes
Expand Down
2 changes: 1 addition & 1 deletion pkg/tools/benchmark/job/service/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func New(opts ...Option) (Job, error) {

func (j *job) PreStart(ctx context.Context) error {
log.Infof("[benchmark job] start download dataset of %s", j.hdf5.GetName().String())
if err := j.hdf5.Download(); err != nil {
if err := j.hdf5.Download(j.dataset.URL); err != nil {
return err
}
log.Infof("[benchmark job] success download dataset of %s", j.hdf5.GetName().String())
Expand Down
5 changes: 4 additions & 1 deletion pkg/tools/benchmark/job/service/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,12 +141,15 @@ func WithHdf5(d hdf5.Data) Option {
}
}

// WithDataset sets the config.BenchmarkDataset including benchmakr dataset name, group name of hdf5.Data, the number of index, start range and end range.
// WithDataset sets the config.BenchmarkDataset including benchmark dataset name, group name of hdf5.Data, the number of index, start range and end range, and original URL which is used for download user defined hdf5.
func WithDataset(d *config.BenchmarkDataset) Option {
return func(j *job) error {
if d == nil {
return errors.NewErrInvalidOption("dataset", d)
}
if d.Name == hdf5.Original.String() && len(d.URL) == 0 {
return errors.NewErrInvalidOption("dataset", d)
}
j.dataset = d
return nil
}
Expand Down

0 comments on commit 24ce258

Please sign in to comment.