Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add broken index count metrics #2083

Merged
merged 8 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions charts/vald/values/dev-broken-index-backup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#
# Copyright (C) 2019-2023 vdaas.org vald team <[email protected]>
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

defaults:
image:
tag: nightly
server_config:
metrics:
pprof:
enabled: true
servers:
grpc:
server:
grpc:
interceptors:
- RecoverInterceptor
- TraceInterceptor
- MetricInterceptor
grpc:
client:
dial_option:
interceptors:
- TraceInterceptor
observability:
enabled: true
otlp:
collector_endpoint: "opentelemetry-collector-collector.default.svc.cluster.local:4317"
trace:
enabled: true

gateway:
lb:
podAnnotations:
profefe.com/enable: "true"
profefe.com/port: "6060"
profefe.com/service: "vald-lb-gateway"
resources:
requests:
cpu: 100m
memory: 50Mi

agent:
podAnnotations:
profefe.com/enable: "true"
profefe.com/port: "6060"
profefe.com/service: "vald-agent-ngt"
minReplicas: 5
maxReplicas: 10
podManagementPolicy: Parallel
resources:
requests:
cpu: 100m
memory: 50Mi
ngt:
dimension: 784
index_path: "/var/ngt/index"
enable_in_memory_mode: false
broken_index_history_limit: 3
persistentVolume:
enabled: true
# For local-path-provisioner, we cannot use ReadWriteOncePod because it is not supported.
accessMode: ReadWriteOnce
storageClass: local-path
size: 1Gi

discoverer:
podAnnotations:
profefe.com/enable: "true"
profefe.com/port: "6060"
profefe.com/service: "vald-discoverer"
resources:
requests:
cpu: 100m
memory: 50Mi

manager:
index:
podAnnotations:
profefe.com/enable: "true"
profefe.com/port: "6060"
profefe.com/service: "vald-manager-index"
resources:
requests:
cpu: 100m
memory: 30Mi
4 changes: 1 addition & 3 deletions example/client/agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"context"
"encoding/json"
"flag"
"fmt"
"math"
"time"

Expand Down Expand Up @@ -134,10 +133,9 @@ func main() {
t := train[i]
var sum float64
for i := range r {
fmt.Println("r, t: ", r[i], t[i])
sum += math.Pow(float64(t[i]-r[i]), 2)
}
fmt.Println(sum)
glg.Infof("Euclidean distance of r and t: %v", sum)
}
glg.Info("Finish getting object")

Expand Down
24 changes: 24 additions & 0 deletions internal/observability/metrics/agent/core/ngt/ngt.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ const (

isSavingMetricsName = "agent_core_ngt_is_saving"
isSavingMetricsDescription = "Currently saving or not"

brokenIndexStoreCountMetricsName = "agent_core_ngt_broken_index_store_count"
brokenIndexStoreCountMetricsDescription = "How many broken index generations have been stored"
)

type ngtMetrics struct {
Expand Down Expand Up @@ -131,6 +134,15 @@ func (n *ngtMetrics) View() ([]*metrics.View, error) {
return nil, err
}

brokenIndexCount, err := view.New(
view.MatchInstrumentName(brokenIndexStoreCountMetricsName),
view.WithSetDescription(brokenIndexStoreCountMetricsDescription),
view.WithSetAggregation(aggregation.LastValue{}),
)
if err != nil {
return nil, err
}

return []*metrics.View{
&indexCount,
&uncommittedIndexCount,
Expand All @@ -140,6 +152,7 @@ func (n *ngtMetrics) View() ([]*metrics.View, error) {
&executedProactiveGCTotal,
&isIndexing,
&isSaving,
&brokenIndexCount,
}, nil
}

Expand Down Expand Up @@ -216,6 +229,15 @@ func (n *ngtMetrics) Register(m metrics.Meter) error {
return err
}

brokenIndexCount, err := m.AsyncInt64().Gauge(
brokenIndexStoreCountMetricsName,
metrics.WithDescription(brokenIndexStoreCountMetricsDescription),
metrics.WithUnit(metrics.Dimensionless),
)
if err != nil {
return err
}

return m.RegisterCallback(
[]metrics.AsynchronousInstrument{
indexCount,
Expand All @@ -226,6 +248,7 @@ func (n *ngtMetrics) Register(m metrics.Meter) error {
executedProactiveGCTotal,
isIndexing,
isSaving,
brokenIndexCount,
},
func(ctx context.Context) {
var indexing int64
Expand All @@ -246,6 +269,7 @@ func (n *ngtMetrics) Register(m metrics.Meter) error {
executedProactiveGCTotal.Observe(ctx, int64(n.ngt.NumberOfProactiveGCExecution()))
isIndexing.Observe(ctx, int64(indexing))
isSaving.Observe(ctx, int64(saving))
brokenIndexCount.Observe(ctx, int64(n.ngt.BrokenIndexCount()))
},
)
}
Loading