vdaas · kpango · Feb 21, 2024 · Feb 16, 2024 · Feb 15, 2024 · Feb 15, 2024
diff --git a/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml b/charts/vald-benchmark-operator/crds/valdbenchmarkjob.yaml
@@ -277,9 +277,6 @@ spec:
                     - indexes
                     - group
                     - range
-                dimension:
-                  type: integer
-                  minimum: 1
                 global_config:
                   type: object
                   properties:

diff --git a/charts/vald-benchmark-operator/schemas/job-values.yaml b/charts/vald-benchmark-operator/schemas/job-values.yaml
@@ -47,9 +47,6 @@ dataset:
   # @schema {"name": "dataset.url", "type": "string"}
   # dataset.url -- the dataset url which is used for executing benchmark job with user defined hdf5 file
   url: ""
-# @schema {"name": "dimension", "type": "integer", "minimum": 1}
-# dimension -- vector dimension
-dimension: 784
 # @schema {"name": "replica", "type": "integer", "minimum": 1}
 # replica -- the number of running concurrency job
 replica: 1

diff --git a/charts/vald-benchmark-operator/schemas/scenario-values.yaml b/charts/vald-benchmark-operator/schemas/scenario-values.yaml
@@ -59,7 +59,6 @@ jobs:
       range:
         start: 1
         end: 1000
-    dimension: 784
     replica: 1
     repetition: 1
     job_type: "search"

diff --git a/charts/vald-benchmark-operator/values/benchmark-job.yaml b/charts/vald-benchmark-operator/values/benchmark-job.yaml
@@ -27,7 +27,6 @@ spec:
       start: 1
       end: 1000
   job_type: "search"
-  dimension: 784
   repetition: 1
   replica: 1
   rules: []

diff --git a/charts/vald-benchmark-operator/values/benchmark-scenario.yaml b/charts/vald-benchmark-operator/values/benchmark-scenario.yaml
@@ -45,7 +45,6 @@ spec:
   jobs:
     # @schema {"name": "jobs.items.dataset", "type": "object"}
     - job_type: "insert"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []
@@ -62,7 +61,6 @@ spec:
         health_check_duration: "10s"
       rps: 500
     - job_type: "update"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []
@@ -79,7 +77,6 @@ spec:
         health_check_duration: "10s"
       rps: 500
     - job_type: "search"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []
@@ -94,7 +91,6 @@ spec:
         health_check_duration: "10s"
       rps: 2000
     - job_type: "upsert"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []
@@ -111,7 +107,6 @@ spec:
         health_check_duration: "10s"
       rps: 1000
     - job_type: "search"
-      dimension: 784
       repetition: 2
       replica: 1
       rules: []
@@ -133,7 +128,6 @@ spec:
         health_check_duration: "10s"
       rps: 4000
     - job_type: "exists"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []
@@ -148,7 +142,6 @@ spec:
         health_check_duration: "10s"
       rps: 1000
     - job_type: "getobject"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []
@@ -163,7 +156,6 @@ spec:
         health_check_duration: "10s"
       rps: 1000
     - job_type: "remove"
-      dimension: 784
       repetition: 1
       replica: 1
       rules: []

diff --git a/docs/performance/continuos-benchmatk.md b/docs/performance/continuos-benchmatk.md
diff --git a/example/helm/benchmark/job-values.yaml b/example/helm/benchmark/job-values.yaml
@@ -0,0 +1,46 @@
+apiVersion: vald.vdaas.org/v1
+kind: ValdBenchmarkJob
+metadata:
+  name: sample-job
+spec:
+  # @schema {"name": "dataset", "type": "object"}
+  # dataset -- dataset information
+  dataset:
+    # @schema {"name": "dataset.name", "type": "string" }
+    # dataset.name -- the name of dataset
+    name: "fashion-mnist"
+    # @schema {"name": "dataset.indexes", "type": "integer"}
+    # dataset.indexes -- the amount of indexes
+    indexes: 60000
+    # @schema {"name": "dataset.group", "type": "string"}
+    # dataset.group -- the hdf5 group name of dataset
+    group: "train"
+    # @schema {"name": "dataset.range", "type": "object"}
+    # dataset.range -- the data range of indexes
+    range:
+      # @schema {"name": "dataset.range.start", "type": "integer"}
+      # dataset.range.start -- start index number
+      start: 1
+      # @schema {"name": "dataset.range.end", "type": "integer"}
+      # dataset.range.end -- end index number
+      end: 60000
+    url: "http://ann-benchmarks.com/fashion-mnist-784-euclidean.hdf5"
+  job_type: "upsert"
+  repetition: 1
+  replica: 1
+  rules: []
+  client_config:
+    health_check_duration: "10s"
+  rps: 2000
+  insert_config:
+    skip_strict_exist_check: false
+  upsert_config:
+    skip_strict_exist_check: false
+  target:
+    host: "vald-lb-gateway.default.svc.cluster.local"
+    port: 8081
+  global_config:
+    version: v0.0.1
+    time_zone: JST
+    logging:
+      level: debug
diff --git a/example/helm/benchmark/scenario-values.yaml b/example/helm/benchmark/scenario-values.yaml
@@ -0,0 +1,183 @@
+apiVersion: vald.vdaas.org/v1
+kind: ValdBenchmarkScenario
+metadata:
+  name: insert-search
+spec:
+  # @schema {"name": "dataset", "type": "object"}
+  # dataset -- dataset information
+  dataset:
+    # @schema {"name": "dataset.name", "type": "string" }
+    # dataset.name -- the name of dataset
+    name: "fashion-mnist"
+    # @schema {"name": "dataset.indexes", "type": "integer"}
+    # dataset.indexes -- the amount of indexes
+    indexes: 100000
+    # @schema {"name": "dataset.group", "type": "string"}
+    # dataset.group -- the hdf5 group name of dataset
+    group: "train"
+    # @schema {"name": "dataset.range", "type": "object"}
+    # dataset.range -- the data range of indexes
+    range:
+      # @schema {"name": "dataset.range.start", "type": "integer"}
+      # dataset.range.start -- start index number
+      start: 1
+      # @schema {"name": "dataset.range.end", "type": "integer"}
+      # dataset.range.end -- end index number
+      end: 100000
+  # @schema {"name": "jobs", "type": "array", "items": {"type": "object"}}
+  # jobs -- benchmark jobs
+  jobs:
+    - job_type: "insert"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 10000
+        group: "train"
+        range:
+          start: 1
+          end: 10000
+      insert_config:
+        skip_strict_exist_check: true
+      client_config:
+        health_check_duration: "10s"
+      rps: 2000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "upsert"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 20000
+        group: "train"
+        range:
+          start: 1
+          end: 20000
+      upsert_config:
+        skip_strict_exist_check: true
+      client_config:
+        health_check_duration: "10s"
+      rps: 2000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "search"
+      repetition: 1
+      replica: 1
+      rules: []
+      search_config:
+        epsilon: 0.1
+        radius: -1
+        num: 10
+        min_num: 10
+        timeout: "1m"
+        enable_linear_search: false
+      client_config:
+        health_check_duration: "10s"
+      rps: 2000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "upsert"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 30000
+        group: "train"
+        range:
+          start: 10001
+          end: 40000
+      search_config:
+        epsilon: 0.1
+        radius: -1
+        num: 10
+        min_num: 5
+        timeout: "1m"
+      upsert_config:
+        skip_strict_exist_check: true
+      client_config:
+        health_check_duration: "10s"
+      rps: 2000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "search"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 20000
+        group: "test"
+        range:
+          start: 1
+          end: 20000
+      search_config:
+        epsilon: 0.1
+        radius: -1
+        num: 10
+        min_num: 10
+        timeout: "1m"
+        enable_linear_search: false
+      client_config:
+        health_check_duration: "10s"
+      rps: 4000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "exists"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 20000
+        group: "train"
+        range:
+          start: 1
+          end: 20000
+      client_config:
+        health_check_duration: "10s"
+      rps: 1000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "getobject"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 20000
+        group: "train"
+        range:
+          start: 1
+          end: 20000
+      client_config:
+        health_check_duration: "10s"
+      rps: 1000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+    - job_type: "remove"
+      repetition: 1
+      replica: 1
+      rules: []
+      dataset:
+        name: "fashion-mnist"
+        indexes: 40000
+        group: "train"
+        range:
+          start: 1
+          end: 40000
+      remove_config:
+        skip_strict_exist_check: true
+      client_config:
+        health_check_duration: "10s"
+      rps: 1000
+      concurrency_limit: 150
+      ttl_seconds_after_finished: 100
+  # @schema {"name": "target", "type": "array", "items": {"type": "object"}}
+  # target -- target cluster host&port
+  target:
+    host: "vald-lb-gateway.default.svc.cluster.local"
+    port: 8081
diff --git a/internal/config/benchmark.go b/internal/config/benchmark.go
@@ -21,7 +21,6 @@ package config
 type BenchmarkJob struct {
 	Target             *BenchmarkTarget    `json:"target,omitempty"               yaml:"target"`
 	Dataset            *BenchmarkDataset   `json:"dataset,omitempty"              yaml:"dataset"`
-	Dimension          int                 `json:"dimension,omitempty"            yaml:"dimension"`
 	Replica            int                 `json:"replica,omitempty"              yaml:"replica"`
 	Repetition         int                 `json:"repetition,omitempty"           yaml:"repetition"`
 	JobType            string              `json:"job_type,omitempty"             yaml:"job_type"`

diff --git a/internal/k8s/vald/benchmark/api/v1/job_types.go b/internal/k8s/vald/benchmark/api/v1/job_types.go
@@ -28,7 +28,6 @@ type BenchmarkJobSpec struct {
 	ServerConfig            *config.Servers            `json:"server_config,omitempty"              yaml:"server_config"`
 	Target                  *BenchmarkTarget           `json:"target,omitempty"                     yaml:"target"`
 	Dataset                 *BenchmarkDataset          `json:"dataset,omitempty"                    yaml:"dataset"`
-	Dimension               int                        `json:"dimension,omitempty"                  yaml:"dimension"`
 	Replica                 int                        `json:"replica,omitempty"                    yaml:"replica"`
 	Repetition              int                        `json:"repetition,omitempty"                 yaml:"repetition"`
 	JobType                 string                     `json:"job_type,omitempty"                   yaml:"job_type"`

diff --git a/internal/k8s/vald/benchmark/job/job_template_option.go b/internal/k8s/vald/benchmark/job/job_template_option.go
@@ -64,13 +64,21 @@ func WithImagePullPolicy(p ImagePullPolicy) BenchmarkJobTplOption {
 // BenchmarkJobOption represents the option for create benchmark job template.
 type BenchmarkJobOption func(b *jobs.Job) error
 
-// defaultTTLSeconds represents the default TTLSecondsAfterFinished for benchmark job template.
-const defaultTTLSeconds int32 = 600
+const (
+	// defaultTTLSeconds represents the default TTLSecondsAfterFinished for benchmark job template.
+	defaultTTLSeconds int32 = 600
+	// defaultCompletions represents the default completions for benchmark job template.
+	defaultCompletions int32 = 1
+	// defaultParallelism represents the default parallelism for benchmark job template.
+	defaultParallelism int32 = 1
+)
 
 var defaultBenchmarkJobOpts = []BenchmarkJobOption{
 	WithSvcAccountName(svcAccount),
 	WithRestartPolicy(RestartPolicyNever),
 	WithTTLSecondsAfterFinished(defaultTTLSeconds),
+	WithCompletions(defaultCompletions),
+	WithParallelism(defaultParallelism),
 }
 
 // WithSvcAccountName sets the service account name for benchmark job.

diff --git a/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml b/k8s/tools/benchmark/operator/crds/valdbenchmarkjob.yaml
@@ -277,9 +277,6 @@ spec:
                     - indexes
                     - group
                     - range
-                dimension:
-                  type: integer
-                  minimum: 1
                 global_config:
                   type: object
                   properties:

diff --git a/pkg/tools/benchmark/job/service/job.go b/pkg/tools/benchmark/job/service/job.go
@@ -82,7 +82,6 @@ func (jt jobType) String() string {
 
 type job struct {
 	eg                 errgroup.Group
-	dimension          int
 	dataset            *config.BenchmarkDataset
 	jobType            jobType
 	jobFunc            func(context.Context, chan error) error