vdaas · vankichi · Jun 26, 2023 · Jun 21, 2023 · Jun 21, 2023 · Jun 21, 2023
diff --git a/charts/vald/values/dev-broken-index-backup.yaml b/charts/vald/values/dev-broken-index-backup.yaml
@@ -0,0 +1,98 @@
+#
+# Copyright (C) 2019-2023 vdaas.org vald team <[email protected]>
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+defaults:
+  image:
+    tag: nightly
+  server_config:
+    metrics:
+      pprof:
+        enabled: true
+    servers:
+      grpc:
+        server:
+          grpc:
+            interceptors:
+              - RecoverInterceptor
+              - TraceInterceptor
+              - MetricInterceptor
+  grpc:
+    client:
+      dial_option:
+        interceptors:
+          - TraceInterceptor
+  observability:
+    enabled: true
+    otlp:
+      collector_endpoint: "opentelemetry-collector-collector.default.svc.cluster.local:4317"
+    trace:
+      enabled: true
+
+gateway:
+  lb:
+    podAnnotations:
+      profefe.com/enable: "true"
+      profefe.com/port: "6060"
+      profefe.com/service: "vald-lb-gateway"
+    resources:
+      requests:
+        cpu: 100m
+        memory: 50Mi
+
+agent:
+  podAnnotations:
+    profefe.com/enable: "true"
+    profefe.com/port: "6060"
+    profefe.com/service: "vald-agent-ngt"
+  minReplicas: 5
+  maxReplicas: 10
+  podManagementPolicy: Parallel
+  resources:
+    requests:
+      cpu: 100m
+      memory: 50Mi
+  ngt:
+    dimension: 784
+    index_path: "/var/ngt/index"
+    enable_in_memory_mode: false
+    broken_index_history_limit: 3
+  persistentVolume:
+    enabled: true
+    # For local-path-provisioner, we cannot use ReadWriteOncePod because it is not supported.
+    accessMode: ReadWriteOnce
+    storageClass: local-path
+    size: 1Gi
+
+discoverer:
+  podAnnotations:
+    profefe.com/enable: "true"
+    profefe.com/port: "6060"
+    profefe.com/service: "vald-discoverer"
+  resources:
+    requests:
+      cpu: 100m
+      memory: 50Mi
+
+manager:
+  index:
+    podAnnotations:
+      profefe.com/enable: "true"
+      profefe.com/port: "6060"
+      profefe.com/service: "vald-manager-index"
+    resources:
+      requests:
+        cpu: 100m
+        memory: 30Mi
diff --git a/example/client/agent/main.go b/example/client/agent/main.go
@@ -17,7 +17,6 @@ import (
 	"context"
 	"encoding/json"
 	"flag"
-	"fmt"
 	"math"
 	"time"
 
@@ -134,10 +133,9 @@ func main() {
 		t := train[i]
 		var sum float64
 		for i := range r {
-			fmt.Println("r, t: ", r[i], t[i])
 			sum += math.Pow(float64(t[i]-r[i]), 2)
 		}
-		fmt.Println(sum)
+		glg.Infof("Euclidean distance of r and t: %v", sum)
 	}
 	glg.Info("Finish getting object")
 

diff --git a/internal/observability/metrics/agent/core/ngt/ngt.go b/internal/observability/metrics/agent/core/ngt/ngt.go
@@ -46,6 +46,9 @@ const (
 
 	isSavingMetricsName        = "agent_core_ngt_is_saving"
 	isSavingMetricsDescription = "Currently saving or not"
+
+	brokenIndexStoreCountMetricsName        = "agent_core_ngt_broken_index_store_count"
+	brokenIndexStoreCountMetricsDescription = "How many broken index generations have been stored"
 )
 
 type ngtMetrics struct {
@@ -131,6 +134,15 @@ func (n *ngtMetrics) View() ([]*metrics.View, error) {
 		return nil, err
 	}
 
+	brokenIndexCount, err := view.New(
+		view.MatchInstrumentName(brokenIndexStoreCountMetricsName),
+		view.WithSetDescription(brokenIndexStoreCountMetricsDescription),
+		view.WithSetAggregation(aggregation.LastValue{}),
+	)
+	if err != nil {
+		return nil, err
+	}
+
 	return []*metrics.View{
 		&indexCount,
 		&uncommittedIndexCount,
@@ -140,6 +152,7 @@ func (n *ngtMetrics) View() ([]*metrics.View, error) {
 		&executedProactiveGCTotal,
 		&isIndexing,
 		&isSaving,
+		&brokenIndexCount,
 	}, nil
 }
 
@@ -216,6 +229,15 @@ func (n *ngtMetrics) Register(m metrics.Meter) error {
 		return err
 	}
 
+	brokenIndexCount, err := m.AsyncInt64().Gauge(
+		brokenIndexStoreCountMetricsName,
+		metrics.WithDescription(brokenIndexStoreCountMetricsDescription),
+		metrics.WithUnit(metrics.Dimensionless),
+	)
+	if err != nil {
+		return err
+	}
+
 	return m.RegisterCallback(
 		[]metrics.AsynchronousInstrument{
 			indexCount,
@@ -226,6 +248,7 @@ func (n *ngtMetrics) Register(m metrics.Meter) error {
 			executedProactiveGCTotal,
 			isIndexing,
 			isSaving,
+			brokenIndexCount,
 		},
 		func(ctx context.Context) {
 			var indexing int64
@@ -246,6 +269,7 @@ func (n *ngtMetrics) Register(m metrics.Meter) error {
 			executedProactiveGCTotal.Observe(ctx, int64(n.ngt.NumberOfProactiveGCExecution()))
 			isIndexing.Observe(ctx, int64(indexing))
 			isSaving.Observe(ctx, int64(saving))
+			brokenIndexCount.Observe(ctx, int64(n.ngt.BrokenIndexCount()))
 		},
 	)
 }