Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add snapstore bucket-related metrics #211

Merged
merged 1 commit into from
Apr 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions doc/usage/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,17 @@ Two major steps in initialization of etcd data directory are validation and rest
| etcdbr_validation_duration_seconds | Total latency distribution of validating data directory. | Histogram |
| etcdbr_restoration_duration_seconds | Total latency distribution of restoring from snapshot. | Histogram |

### Snapstore

These bucket-related metrics provide information about the latest set of delta snapshots stored in the snapstore. They provide a rough estimation of the amount of time required to perform a restoration from the latest set of snapshots.

| Name | Description | Type |
|------|-------------|------|
| etcdbr_snapstore_latest_deltas_total | Total number of delta snapshots taken since the latest full snapshot. | Gauge |
| etcdbr_snapstore_latest_deltas_revisions_total | Total number of revisions stored in delta snapshots taken since the latest full snapshot. | Gauge |

`etcdbr_snapstore_latest_deltas_revisions_total` indicates the total number of etcd revisions (events) stored in the latest set of delta snapshots. The amount of time it would take to perform an etcd data restoration with the latest set of snapshots is directly proportional to this value.

### Network

These metrics describe the status of the network usage. We use `/proc/<etcdbr-pid>/net/dev` to get network usage details for the etcdbr process. Currently these metrics are only supported on linux-based distributions.
Expand Down
35 changes: 33 additions & 2 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,9 @@ const (
// LabelKind is a metrics label indicates kind of snapshot associated with metric.
LabelKind = "kind"

namespaceEtcdBR = "etcdbr"
subsystemSnapshot = "snapshot"
namespaceEtcdBR = "etcdbr"
subsystemSnapshot = "snapshot"
subsystemSnapstore = "snapstore"
)

var (
Expand Down Expand Up @@ -131,6 +132,27 @@ var (
},
[]string{LabelSucceeded},
)

// SnapstoreLatestDeltasTotal is metric to expose total number of delta snapshots taken since the latest full snapshot.
SnapstoreLatestDeltasTotal = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespaceEtcdBR,
Subsystem: subsystemSnapstore,
Name: "latest_deltas_total",
Help: "Total number of delta snapshots taken since the latest full snapshot.",
},
[]string{},
)
// SnapstoreLatestDeltasRevisionsTotal is metric to expose total number of revisions stored in delta snapshots taken since the latest full snapshot.
SnapstoreLatestDeltasRevisionsTotal = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespaceEtcdBR,
Subsystem: subsystemSnapstore,
Name: "latest_deltas_revisions_total",
Help: "Total number of revisions stored in delta snapshots taken since the latest full snapshot.",
},
[]string{},
)
)

// generateLabelCombinations generates combinations of label values for metrics
Expand Down Expand Up @@ -291,6 +313,12 @@ func init() {
DefragmentationDurationSeconds.With(prometheus.Labels(combination))
}

// SnapstoreLatestDeltasTotal
SnapstoreLatestDeltasTotal.With(prometheus.Labels(map[string]string{}))

// SnapstoreLatestDeltasSize
SnapstoreLatestDeltasRevisionsTotal.With(prometheus.Labels(map[string]string{}))

// Metrics have to be registered to be exposed:
prometheus.MustRegister(GCSnapshotCounter)

Expand All @@ -302,4 +330,7 @@ func init() {
prometheus.MustRegister(RestorationDurationSeconds)
prometheus.MustRegister(ValidationDurationSeconds)
prometheus.MustRegister(DefragmentationDurationSeconds)

prometheus.MustRegister(SnapstoreLatestDeltasTotal)
prometheus.MustRegister(SnapstoreLatestDeltasRevisionsTotal)
}
23 changes: 18 additions & 5 deletions pkg/miscellaneous/miscellaneous.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@ package miscellaneous
import (
"sort"

"github.com/gardener/etcd-backup-restore/pkg/metrics"
"github.com/gardener/etcd-backup-restore/pkg/snapstore"
"github.com/prometheus/client_golang/prometheus"
)

// GetLatestFullSnapshotAndDeltaSnapList returns the latest snapshot
func GetLatestFullSnapshotAndDeltaSnapList(store snapstore.SnapStore) (*snapstore.Snapshot, snapstore.SnapList, error) {
var deltaSnapList snapstore.SnapList
var (
fullSnapshot *snapstore.Snapshot
deltaSnapList snapstore.SnapList
)
snapList, err := store.List()
if err != nil {
return nil, nil, err
Expand All @@ -33,11 +38,19 @@ func GetLatestFullSnapshotAndDeltaSnapList(store snapstore.SnapStore) (*snapstor
continue
}
if snapList[index-1].Kind == snapstore.SnapshotKindFull {
sort.Sort(deltaSnapList)
return snapList[index-1], deltaSnapList, nil
fullSnapshot = snapList[index-1]
break
}
deltaSnapList = append(deltaSnapList, snapList[index-1])
}
sort.Sort(deltaSnapList) //added to ensure the list is well formed for only deltasnapshots scenarios as well
return nil, deltaSnapList, nil

sort.Sort(deltaSnapList) // ensures that the delta snapshot list is well formed
metrics.SnapstoreLatestDeltasTotal.With(prometheus.Labels{}).Set(float64(len(deltaSnapList)))
if len(deltaSnapList) == 0 {
metrics.SnapstoreLatestDeltasRevisionsTotal.With(prometheus.Labels{}).Set(0)
} else {
revisionDiff := deltaSnapList[len(deltaSnapList)-1].LastRevision - deltaSnapList[0].StartRevision
metrics.SnapstoreLatestDeltasRevisionsTotal.With(prometheus.Labels{}).Set(float64(revisionDiff))
}
return fullSnapshot, deltaSnapList, nil
}
4 changes: 4 additions & 0 deletions pkg/snapshot/snapshotter/snapshotter.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ func (ssr *Snapshotter) takeFullSnapshot() (*snapstore.Snapshot, error) {

metrics.LatestSnapshotRevision.With(prometheus.Labels{metrics.LabelKind: ssr.prevSnapshot.Kind}).Set(float64(ssr.prevSnapshot.LastRevision))
metrics.LatestSnapshotTimestamp.With(prometheus.Labels{metrics.LabelKind: ssr.prevSnapshot.Kind}).Set(float64(ssr.prevSnapshot.CreatedOn.Unix()))
metrics.SnapstoreLatestDeltasTotal.With(prometheus.Labels{}).Set(0)
metrics.SnapstoreLatestDeltasRevisionsTotal.With(prometheus.Labels{}).Set(0)

ssr.logger.Infof("Successfully saved full snapshot at: %s", path.Join(s.SnapDir, s.SnapName))
}
Expand Down Expand Up @@ -340,6 +342,8 @@ func (ssr *Snapshotter) TakeDeltaSnapshot() (*snapstore.Snapshot, error) {
metrics.LatestSnapshotRevision.With(prometheus.Labels{metrics.LabelKind: ssr.prevSnapshot.Kind}).Set(float64(ssr.prevSnapshot.LastRevision))
metrics.LatestSnapshotTimestamp.With(prometheus.Labels{metrics.LabelKind: ssr.prevSnapshot.Kind}).Set(float64(ssr.prevSnapshot.CreatedOn.Unix()))
metrics.SnapshotRequired.With(prometheus.Labels{metrics.LabelKind: snapstore.SnapshotKindDelta}).Set(0)
metrics.SnapstoreLatestDeltasTotal.With(prometheus.Labels{}).Inc()
metrics.SnapstoreLatestDeltasRevisionsTotal.With(prometheus.Labels{}).Add(float64(snap.LastRevision - snap.StartRevision))
ssr.logger.Infof("Successfully saved delta snapshot at: %s", path.Join(snap.SnapDir, snap.SnapName))
return snap, nil
}
Expand Down