Skip to content

Commit

Permalink
[receiver/vcenter] Adds vSAN metrics for Virtual Machines (open-telem…
Browse files Browse the repository at this point in the history
…etry#34082)

**Description:** <Describe what has changed.>

Adds a set of vSAN metrics for Virtual Machines.
```
vcenter.vm.vsan.throughput          (direction={read/write})
vcenter.vm.vsan.iops                (direction={read/write})
vcenter.vm.vsan.latency.avg         (direction={read/write})
```

**Link to tracking Issue:** <Issue number if applicable>
open-telemetry#33556 

**Testing:** <Describe what testing was performed and which tests were
added.>
Unit tests added for scraper.
Could not add client tests as `govmomi` vSAN simulator currently not
implemented.
Tested against live environment.

**Documentation:** <Describe the documentation added.>
New documentation generated
  • Loading branch information
StefanKurek authored Jul 26, 2024
1 parent 1311344 commit 51f4a89
Show file tree
Hide file tree
Showing 21 changed files with 1,142 additions and 19 deletions.
27 changes: 27 additions & 0 deletions .chloggen/vcenterreceiver-vm-vsan.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'enhancement'

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: 'vcenterreceiver'

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Adds a number of default disabled vSAN metrics for Virtual Machines.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [33556]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dist/
# Miscellaneous files
*.sw[op]
*.DS_Store
__debug_bin*

# Coverage
coverage/*
Expand Down
2 changes: 2 additions & 0 deletions cmd/otelcontribcol/go.sum

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

274 changes: 263 additions & 11 deletions receiver/vcenterreceiver/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,35 +7,45 @@ import (
"context"
"errors"
"fmt"
"maps"
"net/url"
"reflect"
"strconv"
"strings"
"time"

"github.com/vmware/govmomi"
"github.com/vmware/govmomi/find"
"github.com/vmware/govmomi/object"
"github.com/vmware/govmomi/performance"
"github.com/vmware/govmomi/property"
"github.com/vmware/govmomi/view"
"github.com/vmware/govmomi/vim25"
"github.com/vmware/govmomi/vim25/mo"
"github.com/vmware/govmomi/vim25/soap"
vt "github.com/vmware/govmomi/vim25/types"
"github.com/vmware/govmomi/vsan"
"github.com/vmware/govmomi/vsan/types"
"go.uber.org/zap"
)

// vcenterClient is a client that collects data from a vCenter endpoint.
type vcenterClient struct {
moClient *govmomi.Client
vimDriver *vim25.Client
finder *find.Finder
pc *property.Collector
pm *performance.Manager
vm *view.Manager
cfg *Config
logger *zap.Logger
moClient *govmomi.Client
vimDriver *vim25.Client
vsanDriver *vsan.Client
finder *find.Finder
pm *performance.Manager
vm *view.Manager
cfg *Config
}

var newVcenterClient = defaultNewVcenterClient

func defaultNewVcenterClient(c *Config) *vcenterClient {
func defaultNewVcenterClient(l *zap.Logger, c *Config) *vcenterClient {
return &vcenterClient{
cfg: c,
logger: l,
cfg: c,
}
}

Expand Down Expand Up @@ -70,10 +80,15 @@ func (vc *vcenterClient) EnsureConnection(ctx context.Context) error {
}
vc.moClient = client
vc.vimDriver = client.Client
vc.pc = property.DefaultCollector(vc.vimDriver)
vc.finder = find.NewFinder(vc.vimDriver)
vc.pm = performance.NewManager(vc.vimDriver)
vc.vm = view.NewManager(vc.vimDriver)
vsanDriver, err := vsan.NewClient(ctx, vc.vimDriver)
if err != nil {
vc.logger.Info(fmt.Errorf("could not create VSAN client: %w", err).Error())
} else {
vc.vsanDriver = vsanDriver
}
return nil
}

Expand Down Expand Up @@ -320,3 +335,240 @@ func (vc *vcenterClient) PerfMetricsQuery(
resultsByRef: resultsByRef,
}, nil
}

// VSANQueryResults contains all returned vSAN metric related data
type VSANQueryResults struct {
// Contains vSAN metric data keyed by UUID string
MetricResultsByUUID map[string]*VSANMetricResults
}

// VSANMetricResults contains vSAN metric related data for a single resource
type VSANMetricResults struct {
// Contains UUID info for related resource
UUID string
// Contains returned metric value info for all metrics
MetricDetails []*VSANMetricDetails
}

// VSANMetricDetails contains vSAN metric data for a single metric
type VSANMetricDetails struct {
// Contains the metric label
MetricLabel string
// Contains the metric interval in seconds
Interval int32
// Contains timestamps for all metric values
Timestamps []*time.Time
// Contains all values for vSAN metric label
Values []int64
}

// vSANQueryType represents the type of VSAN query
type vSANQueryType string

const (
VSANQueryTypeVirtualMachines vSANQueryType = "virtual-machine:*"
)

// getLabelsForQueryType returns the appropriate labels for each query type
func (vc *vcenterClient) getLabelsForQueryType(queryType vSANQueryType) []string {
switch queryType {
case VSANQueryTypeVirtualMachines:
return []string{
"iopsRead", "iopsWrite", "throughputRead", "throughputWrite",
"latencyRead", "latencyWrite",
}
default:
return []string{}
}
}

// VSANVirtualMachines returns back virtual machine vSAN performance metrics
func (vc *vcenterClient) VSANVirtualMachines(
ctx context.Context,
clusterRefs []*vt.ManagedObjectReference,
) (*VSANQueryResults, error) {
results, err := vc.vSANQuery(ctx, VSANQueryTypeVirtualMachines, clusterRefs)
err = vc.handleVSANError(err, VSANQueryTypeVirtualMachines)
return results, err
}

// vSANQuery performs a vSAN query for the specified type across all clusters
func (vc *vcenterClient) vSANQuery(
ctx context.Context,
queryType vSANQueryType,
clusterRefs []*vt.ManagedObjectReference,
) (*VSANQueryResults, error) {
allResults := VSANQueryResults{
MetricResultsByUUID: map[string]*VSANMetricResults{},
}

for _, clusterRef := range clusterRefs {
results, err := vc.vSANQueryByCluster(ctx, queryType, clusterRef)
if err != nil {
return &allResults, err
}

maps.Copy(allResults.MetricResultsByUUID, results.MetricResultsByUUID)
}

return &allResults, nil
}

// vSANQueryByCluster performs a vSAN query for the specified type for one cluster
func (vc *vcenterClient) vSANQueryByCluster(
ctx context.Context,
queryType vSANQueryType,
clusterRef *vt.ManagedObjectReference,
) (*VSANQueryResults, error) {
queryResults := VSANQueryResults{
MetricResultsByUUID: map[string]*VSANMetricResults{},
}
// Not all vCenters support vSAN so just return an empty result
if vc.vsanDriver == nil {
return &queryResults, nil
}

now := time.Now()
querySpec := []types.VsanPerfQuerySpec{
{
EntityRefId: string(queryType),
StartTime: &now,
EndTime: &now,
Labels: vc.getLabelsForQueryType(queryType),
},
}
rawResults, err := vc.vsanDriver.VsanPerfQueryPerf(ctx, clusterRef, querySpec)
if err != nil {
return nil, fmt.Errorf("problem retrieving %s vSAN metrics for cluster %s: %w", queryType, clusterRef.Value, err)
}

queryResults.MetricResultsByUUID = map[string]*VSANMetricResults{}
for _, rawResult := range rawResults {
metricResults, err := vc.convertVSANResultToMetricResults(rawResult)
if err != nil && metricResults != nil {
return &queryResults, fmt.Errorf("problem processing %s [%s] vSAN metrics for cluster %s: %w", queryType, metricResults.UUID, clusterRef.Value, err)
}
if err != nil {
return &queryResults, fmt.Errorf("problem processing %s vSAN metrics for cluster %s: %w", queryType, clusterRef.Value, err)
}

queryResults.MetricResultsByUUID[metricResults.UUID] = metricResults
}
return &queryResults, nil
}

func (vc *vcenterClient) handleVSANError(
err error,
queryType vSANQueryType,
) error {
faultErr := errors.Unwrap(err)
if faultErr == nil {
return err
}
if !soap.IsSoapFault(faultErr) {
return err
}

fault := soap.ToSoapFault(faultErr)
msg := fault.String

if fault.Detail.Fault != nil {
msg = reflect.TypeOf(fault.Detail.Fault).Name()
}
switch msg {
case "NotSupported":
vc.logger.Debug(fmt.Sprintf("%s vSAN metrics not supported: %s", queryType, err.Error()))
return nil
case "NotFound":
vc.logger.Debug(fmt.Sprintf("no %s vSAN metrics found: %s", queryType, err.Error()))
return nil
default:
return err
}
}

func (vc *vcenterClient) convertVSANResultToMetricResults(vSANResult types.VsanPerfEntityMetricCSV) (*VSANMetricResults, error) {
uuid, err := vc.uuidFromEntityRefID(vSANResult.EntityRefId)
if err != nil {
return nil, err
}

metricResults := VSANMetricResults{
UUID: uuid,
MetricDetails: []*VSANMetricDetails{},
}

// Parse all timestamps
localZone, _ := time.Now().Local().Zone()
timeStrings := strings.Split(vSANResult.SampleInfo, ",")
timestamps := []time.Time{}
for _, timeString := range timeStrings {
// Assuming the collector is making the request in the same time zone as the localized response
// from the vSAN API. Not a great assumption, but otherwise it will almost definitely be wrong
// if we assume that it is UTC. There is precedent for this method at least.
timestamp, err := time.Parse("2006-01-02 15:04:05 MST", fmt.Sprintf("%s %s", timeString, localZone))
if err != nil {
return &metricResults, fmt.Errorf("problem parsing timestamp from %s: %w", timeString, err)
}

timestamps = append(timestamps, timestamp)
}

// Parse all metrics
for _, vSANValue := range vSANResult.Value {
metricDetails, err := vc.convertVSANValueToMetricDetails(vSANValue, timestamps)
if err != nil {
return &metricResults, err
}

metricResults.MetricDetails = append(metricResults.MetricDetails, metricDetails)
}
return &metricResults, nil
}

func (vc *vcenterClient) convertVSANValueToMetricDetails(
vSANValue types.VsanPerfMetricSeriesCSV,
timestamps []time.Time,
) (*VSANMetricDetails, error) {
metricLabel := vSANValue.MetricId.Label
metricInterval := vSANValue.MetricId.MetricsCollectInterval
// If not found assume the interval is 5m
if metricInterval == 0 {
vc.logger.Warn(fmt.Sprintf("no interval found for vSAN metric [%s] so assuming 5m", metricLabel))
metricInterval = 300
}
metricDetails := VSANMetricDetails{
MetricLabel: metricLabel,
Interval: metricInterval,
Timestamps: []*time.Time{},
Values: []int64{},
}
valueStrings := strings.Split(vSANValue.Values, ",")
if len(valueStrings) != len(timestamps) {
return nil, fmt.Errorf("number of timestamps [%d] doesn't match number of values [%d] for metric %s", len(timestamps), len(valueStrings), metricLabel)
}

// Match up timestamps with metric values
for i, valueString := range valueStrings {
value, err := strconv.ParseInt(valueString, 10, 64)
if err != nil {
return nil, fmt.Errorf("problem converting value [%s] for metric %s", valueString, metricLabel)
}

metricDetails.Timestamps = append(metricDetails.Timestamps, &timestamps[i])
metricDetails.Values = append(metricDetails.Values, value)
}

return &metricDetails, nil
}

// uuidFromEntityRefID returns the UUID portion of the EntityRefId
func (vc *vcenterClient) uuidFromEntityRefID(id string) (string, error) {
colonIndex := strings.Index(id, ":")
if colonIndex != -1 {
uuid := id[colonIndex+1:]
return uuid, nil
}

return "", fmt.Errorf("no ':' found in EntityRefId [%s] to parse UUID", id)
}
Loading

0 comments on commit 51f4a89

Please sign in to comment.