Skip to content
This repository has been archived by the owner on Feb 27, 2023. It is now read-only.

Commit

Permalink
Merge pull request #759 from yeya24/feature/add-supernode-metrics
Browse files Browse the repository at this point in the history
feature: add some supernode metrics
  • Loading branch information
starnop authored Aug 15, 2019
2 parents fb377a9 + f091250 commit dbb029d
Show file tree
Hide file tree
Showing 24 changed files with 553 additions and 188 deletions.
2 changes: 1 addition & 1 deletion dfdaemon/constant/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const (
)

const (
// Namespace is the prefix of the metrics' name of dragonfly
// Namespace is the prefix of metrics namespace of dragonfly
Namespace = "dragonfly"
// Subsystem represents metrics for dfdaemon
Subsystem = "dfdaemon"
Expand Down
5 changes: 3 additions & 2 deletions dfdaemon/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/dragonflyoss/Dragonfly/version"

"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)

Expand Down Expand Up @@ -83,6 +84,8 @@ func New(opts ...Option) (*Server, error) {
},
proxy: p,
}
// register dfdaemon build information
version.NewBuildInfo("dfdaemon", prometheus.DefaultRegisterer)

for _, opt := range opts {
if err := opt(s); err != nil {
Expand Down Expand Up @@ -121,8 +124,6 @@ func (s *Server) Start() error {
} else {
logrus.Infof("start dfdaemon http server on %s", s.server.Addr)
}
// register dfdaemon build information
version.NewBuildInfo("dfdaemon")
return s.server.ListenAndServe()
}

Expand Down
9 changes: 9 additions & 0 deletions docs/user_guide/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ This doc contains all the metrics that Dragonfly components currently support. N
- dragonfly_supernode_http_request_duration_seconds{code, handler, method} - http request latency in seconds
- dragonfly_supernode_http_request_size_bytes{code, handler, method} - http request size in bytes
- dragonfly_supernode_http_response_size_bytes{code, handler, method} - http response size in bytes
- dragonfly_supernode_peers{peer} - dragonfly peers, the label peer consists of the hostname and ip address of one peer.
- dragonfly_supernode_tasks{cdnstatus} - dragonfly tasks
- dragonfly_supernode_tasks_registered_total{} - total times of registering new tasks. counter type.
- dragonfly_supernode_dfgettasks{callsystem, status} - dragonfly dfget tasks
- dragonfly_supernode_dfgettasks_registered_total{callsystem} - total times of registering new dfgettasks. counter type.
- dragonfly_supernode_dfgettasks_failed_total{callsystem} - total times of failed dfgettasks. counter type.
- dragonfly_supernode_schedule_duration_milliseconds{peer} - duration for task scheduling in milliseconds
- dragonfly_supernode_trigger_cdn_total{} - total times of triggering cdn.
- dragonfly_supernode_trigger_cdn_failed_total{} - total failed times of triggering cdn.

## Dfdaemon

Expand Down
2 changes: 1 addition & 1 deletion docs/user_guide/monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ We provide several functions to add metrics easily. Here is an example to add a
import "github.com/dragonflyoss/Dragonfly/pkg/util"

requestCounter := util.NewCounter("supernode", "http_requests_total",
"Counter of HTTP requests.", []string{"code"})
"Counter of HTTP requests.", []string{"code"}, nil)
requestCounter.WithLabelValues("200").Inc()
```

Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ require (
github.com/stretchr/testify v1.2.2
github.com/valyala/fasthttp v1.3.0
github.com/willf/bitset v0.0.0-20190228212526-18bd95f470f9
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 // indirect
gopkg.in/gcfg.v1 v1.2.3
gopkg.in/mgo.v2 v2.0.0-20160818020120-3f83fa500528 // indirect
gopkg.in/warnings.v0 v0.1.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73r
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190522155817-f3200d17e092 h1:4QSRKanuywn15aTZvI/mIDEgPQpswuFndXpOj3rKEco=
golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
Expand Down
103 changes: 103 additions & 0 deletions pkg/metricsutils/metrics_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright The Dragonfly Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package metricsutils

import (
"github.com/prometheus/client_golang/prometheus"
)

const (
namespace = "dragonfly"
)

// NewCounter will register a Counter metric to specified registry and return it.
// If registry is not specified, it will register metric to default prometheus registry.
func NewCounter(subsystem, name, help string, labels []string, register prometheus.Registerer) *prometheus.CounterVec {
if register == nil {
register = prometheus.DefaultRegisterer
}
m := prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: name,
Help: help,
},
labels,
)
register.MustRegister(m)
return m
}

// NewGauge will register a Gauge metric to specified registry and return it.
// If registry is not specified, it will register metric to default prometheus registry.
func NewGauge(subsystem, name, help string, labels []string, register prometheus.Registerer) *prometheus.GaugeVec {
if register == nil {
register = prometheus.DefaultRegisterer
}
m := prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: name,
Help: help,
},
labels,
)
register.MustRegister(m)
return m
}

// NewSummary will register a Summary metric to specified registry and return it.
// If registry is not specified, it will register metric to default prometheus registry.
func NewSummary(subsystem, name, help string, labels []string, objectives map[float64]float64, register prometheus.Registerer) *prometheus.SummaryVec {
if register == nil {
register = prometheus.DefaultRegisterer
}
m := prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: name,
Help: help,
Objectives: objectives,
},
labels,
)
register.MustRegister(m)
return m
}

// NewHistogram will register a Histogram metric to specified registry and return it.
// If registry is not specified, it will register metric to default prometheus registry.
func NewHistogram(subsystem, name, help string, labels []string, buckets []float64, register prometheus.Registerer) *prometheus.HistogramVec {
if register == nil {
register = prometheus.DefaultRegisterer
}
m := prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: name,
Help: help,
Buckets: buckets,
},
labels,
)
register.MustRegister(m)
return m
}
5 changes: 5 additions & 0 deletions pkg/timeutils/time_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,8 @@ import (
func GetCurrentTimeMillis() int64 {
return time.Now().UnixNano() / time.Millisecond.Nanoseconds()
}

// SinceInMilliseconds gets the time since the specified start in milliseconds.
func SinceInMilliseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds()) / float64(time.Millisecond.Nanoseconds())
}
80 changes: 0 additions & 80 deletions pkg/util/metrics_util.go

This file was deleted.

3 changes: 2 additions & 1 deletion supernode/daemon/daemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/dragonflyoss/Dragonfly/supernode/server"

"github.com/go-openapi/strfmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)

Expand All @@ -47,7 +48,7 @@ func New(cfg *config.Config) (*Daemon, error) {
return nil, err
}

s, err := server.New(cfg)
s, err := server.New(cfg, prometheus.DefaultRegisterer)
if err != nil {
return nil, err
}
Expand Down
44 changes: 42 additions & 2 deletions supernode/daemon/mgr/dfgettask/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,27 +22,53 @@ import (

"github.com/dragonflyoss/Dragonfly/apis/types"
"github.com/dragonflyoss/Dragonfly/pkg/errortypes"
"github.com/dragonflyoss/Dragonfly/pkg/metricsutils"
"github.com/dragonflyoss/Dragonfly/pkg/stringutils"
"github.com/dragonflyoss/Dragonfly/pkg/syncmap"
"github.com/dragonflyoss/Dragonfly/supernode/config"
"github.com/dragonflyoss/Dragonfly/supernode/daemon/mgr"
dutil "github.com/dragonflyoss/Dragonfly/supernode/daemon/util"

"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
)

var _ mgr.DfgetTaskMgr = &Manager{}

type metrics struct {
dfgetTasks *prometheus.GaugeVec
dfgetTasksRegisterCount *prometheus.CounterVec
dfgetTasksFailCount *prometheus.CounterVec
}

func newMetrics(register prometheus.Registerer) *metrics {
return &metrics{
dfgetTasks: metricsutils.NewGauge(config.SubsystemSupernode, "dfgettasks",
"Current status of dfgettasks", []string{"callsystem", "status"}, register),

dfgetTasksRegisterCount: metricsutils.NewCounter(config.SubsystemSupernode, "dfgettasks_registered_total",
"Total times of registering dfgettasks", []string{"callsystem"}, register),

dfgetTasksFailCount: metricsutils.NewCounter(config.SubsystemSupernode, "dfgettasks_failed_total",
"Total failure times of dfgettasks", []string{"callsystem"}, register),
}
}

// Manager is an implementation of the interface of DfgetTaskMgr.
type Manager struct {
cfg *config.Config
dfgetTaskStore *dutil.Store
ptoc *syncmap.SyncMap
metrics *metrics
}

// NewManager returns a new Manager.
func NewManager() (*Manager, error) {
func NewManager(cfg *config.Config, register prometheus.Registerer) (*Manager, error) {
return &Manager{
cfg: cfg,
dfgetTaskStore: dutil.NewStore(),
ptoc: syncmap.NewSyncMap(),
metrics: newMetrics(register),
}, nil
}

Expand Down Expand Up @@ -73,6 +99,13 @@ func (dtm *Manager) Add(ctx context.Context, dfgetTask *types.DfGetTask) error {

dtm.ptoc.Add(generatePeerKey(dfgetTask.PeerID, dfgetTask.TaskID), dfgetTask.CID)
dtm.dfgetTaskStore.Put(key, dfgetTask)

// If dfget task is created by supernode cdn, don't update metrics.
if !dtm.cfg.IsSuperPID(dfgetTask.PeerID) || !dtm.cfg.IsSuperCID(dfgetTask.CID) {
dtm.metrics.dfgetTasks.WithLabelValues(dfgetTask.CallSystem, dfgetTask.Status).Inc()
dtm.metrics.dfgetTasksRegisterCount.WithLabelValues(dfgetTask.CallSystem).Inc()
}

return nil
}

Expand Down Expand Up @@ -103,7 +136,7 @@ func (dtm *Manager) Delete(ctx context.Context, clientID, taskID string) error {
return err
}
dtm.ptoc.Delete(generatePeerKey(dfgetTask.PeerID, dfgetTask.TaskID))

dtm.metrics.dfgetTasks.WithLabelValues(dfgetTask.CallSystem, dfgetTask.Status).Dec()
return dtm.dfgetTaskStore.Delete(key)
}

Expand All @@ -115,9 +148,16 @@ func (dtm *Manager) UpdateStatus(ctx context.Context, clientID, taskID, status s
}

if dfgetTask.Status != types.DfGetTaskStatusSUCCESS {
dtm.metrics.dfgetTasks.WithLabelValues(dfgetTask.CallSystem, dfgetTask.Status).Dec()
dtm.metrics.dfgetTasks.WithLabelValues(dfgetTask.CallSystem, status).Inc()
dfgetTask.Status = status
}

// Add the total failed count.
if dfgetTask.Status == types.DfGetTaskStatusFAILED {
dtm.metrics.dfgetTasksFailCount.WithLabelValues(dfgetTask.CallSystem).Inc()
}

return nil
}

Expand Down
Loading

0 comments on commit dbb029d

Please sign in to comment.