Skip to content
This repository has been archived by the owner on Dec 1, 2018. It is now read-only.

Commit

Permalink
add disk io metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
andyxning committed Dec 6, 2017
1 parent 6c245e7 commit e5f1618
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 24 deletions.
6 changes: 5 additions & 1 deletion docs/storage-schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Heapster exports the following metrics to its backends.
| filesystem/available | The number of available bytes remaining in a the filesystem |
| filesystem/inodes | The number of available inodes in a the filesystem |
| filesystem/inodes_free | The number of free inodes remaining in a the filesystem |
| disk/io_read_bytes | Number of bytes read from a disk partition |
| disk/io_write_bytes | Number of bytes written to a disk partition |
| disk/io_read_bytes_rate | Number of bytes read from a disk partition per second |
| disk/io_write_bytes_rate | Number of bytes written to a disk partition per second |
| memory/limit | Memory hard limit in bytes. |
| memory/major_page_faults | Number of major page faults. |
| memory/major_page_faults_rate | Number of major page faults per second. |
Expand Down Expand Up @@ -62,7 +66,7 @@ Heapster tags each metric with the following labels.
| labels | Comma-separated(Default) list of user-provided labels. Format is 'key:value' |
| namespace_id | UID of the namespace of a Pod |
| namespace_name | User-provided name of a Namespace |
| resource_id | A unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage |
| resource_id | A unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage, disk device name under disk/io_read_bytes |
| make | Make of the accelerator (nvidia, amd, google etc.) |
| model | Model of the accelerator (tesla-p100, tesla-k80 etc.) |
| accelerator_id | ID of the accelerator |
Expand Down
6 changes: 5 additions & 1 deletion integration/heapster_api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,11 @@ func runMetricExportTest(fm kubeFramework, svc *kube_v1.Service) error {
core.MetricFilesystemInodesFree.Name: {core.LabelResourceID.Key},
core.MetricAcceleratorMemoryTotal.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key},
core.MetricAcceleratorMemoryUsed.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key},
core.MetricAcceleratorDutyCycle.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key}}
core.MetricAcceleratorDutyCycle.Name: {core.LabelAcceleratorMake.Key, core.LabelAcceleratorModel.Key, core.LabelAcceleratorID.Key},
core.MetricDiskIORead.Name: {core.LabelResourceID.Key},
core.MetricDiskIOReadRate.Name: {core.LabelResourceID.Key},
core.MetricDiskIOWrite.Name: {core.LabelResourceID.Key},
core.MetricDiskIOWriteRate.Name: {core.LabelResourceID.Key}}

for metricName, points := range ts.Metrics {
md, exists := mdMap[metricName]
Expand Down
117 changes: 115 additions & 2 deletions metrics/core/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package core

import (
"fmt"
"time"

cadvisor "github.com/google/cadvisor/info/v1"
Expand Down Expand Up @@ -54,7 +55,9 @@ var RateMetrics = []Metric{
MetricNetworkRxRate,
MetricNetworkRxErrorsRate,
MetricNetworkTxRate,
MetricNetworkTxErrorsRate}
MetricNetworkTxErrorsRate,
MetricDiskIOReadRate,
MetricDiskIOWriteRate}

var RateMetricsMapping = map[string]Metric{
MetricCpuUsage.MetricDescriptor.Name: MetricCpuUsageRate,
Expand All @@ -63,9 +66,15 @@ var RateMetricsMapping = map[string]Metric{
MetricNetworkRx.MetricDescriptor.Name: MetricNetworkRxRate,
MetricNetworkRxErrors.MetricDescriptor.Name: MetricNetworkRxErrorsRate,
MetricNetworkTx.MetricDescriptor.Name: MetricNetworkTxRate,
MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate}
MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate,
MetricDiskIORead.MetricDescriptor.Name: MetricDiskIOReadRate,
MetricDiskIOWrite.MetricDescriptor.Name: MetricDiskIOWriteRate}

var LabeledMetrics = []Metric{
MetricDiskIORead,
MetricDiskIOReadRate,
MetricDiskIOWrite,
MetricDiskIOWriteRate,
MetricFilesystemUsage,
MetricFilesystemLimit,
MetricFilesystemAvailable,
Expand Down Expand Up @@ -885,6 +894,110 @@ var MetricAcceleratorDutyCycle = Metric{
},
}

var MetricDiskIORead = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_read_bytes",
Description: "Cumulative number of bytes read over disk",
Type: MetricCumulative,
ValueType: ValueInt64,
Units: UnitsBytes,
Labels: metricLabels,
},
HasLabeledMetric: func(spec *cadvisor.ContainerSpec) bool {
return spec.HasDiskIo
},
GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric {
result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes))
for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes {
resourceIDKey := ioServiceBytesPerPartition.Device
if resourceIDKey == "" {
resourceIDKey = fmt.Sprintf("%d:%d", ioServiceBytesPerPartition.Major, ioServiceBytesPerPartition.Minor)
}

var value uint64
if v, exists := ioServiceBytesPerPartition.Stats["Read"]; exists {
value = v
}

result = append(result, LabeledMetric{
Name: "disk/io_read_bytes",
Labels: map[string]string{
LabelResourceID.Key: resourceIDKey,
},
MetricValue: MetricValue{
ValueType: ValueInt64,
MetricType: MetricGauge,
IntValue: int64(value),
},
})
}
return result
},
}

var MetricDiskIOWrite = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_write_bytes",
Description: "Cumulative number of bytes write over disk",
Type: MetricCumulative,
ValueType: ValueInt64,
Units: UnitsBytes,
Labels: metricLabels,
},
HasLabeledMetric: func(spec *cadvisor.ContainerSpec) bool {
return spec.HasDiskIo
},
GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric {
result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes))
for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes {
resourceIDKey := ioServiceBytesPerPartition.Device
if resourceIDKey == "" {
resourceIDKey = fmt.Sprintf("%d:%d", ioServiceBytesPerPartition.Major, ioServiceBytesPerPartition.Minor)
}

var value uint64
if v, exists := ioServiceBytesPerPartition.Stats["Write"]; exists {
value = v
}

result = append(result, LabeledMetric{
Name: "disk/io_write_bytes",
Labels: map[string]string{
LabelResourceID.Key: resourceIDKey,
},
MetricValue: MetricValue{
ValueType: ValueInt64,
MetricType: MetricGauge,
IntValue: int64(value),
},
})
}
return result
},
}

var MetricDiskIOReadRate = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_read_bytes_rate",
Description: "Rate of bytes read over disk in bytes per second",
Type: MetricGauge,
ValueType: ValueFloat,
Units: UnitsCount,
Labels: metricLabels,
},
}

var MetricDiskIOWriteRate = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_write_bytes_rate",
Description: "Rate of bytes written over disk in bytes per second",
Type: MetricGauge,
ValueType: ValueFloat,
Units: UnitsCount,
Labels: metricLabels,
},
}

func IsNodeAutoscalingMetric(name string) bool {
for _, autoscalingMetric := range NodeAutoscalingMetrics {
if autoscalingMetric.MetricDescriptor.Name == name {
Expand Down
78 changes: 58 additions & 20 deletions metrics/processors/rate_calculator.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,31 +57,69 @@ func (this *RateCalculator) Process(batch *core.DataBatch) (*core.DataBatch, err
continue
}

var metricValNew, metricValOld core.MetricValue
var foundNew, foundOld bool

for metricName, targetMetric := range this.rateMetricsMapping {
metricValNew, foundNew := newMs.MetricValues[metricName]
metricValOld, foundOld := oldMs.MetricValues[metricName]
if foundNew && foundOld && metricName == core.MetricCpuUsage.MetricDescriptor.Name {
// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) /
(newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano())

newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueInt64,
MetricType: core.MetricGauge,
IntValue: newVal,
if metricName == core.MetricDiskIORead.MetricDescriptor.Name || metricName == core.MetricDiskIOWrite.MetricDescriptor.Name {
for _, itemNew := range newMs.LabeledMetrics {
foundNew, foundOld = false, false
if itemNew.Name == metricName {
metricValNew, foundNew = itemNew.MetricValue, true
for _, itemOld := range oldMs.LabeledMetrics {
if itemOld.Name == metricName {
metricValOld, foundOld = itemOld.MetricValue, true
break
}
}
}

if foundNew && foundOld {
if targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())

newMs.LabeledMetrics = append(newMs.LabeledMetrics, core.LabeledMetric{
Name: targetMetric.MetricDescriptor.Name,
Labels: itemNew.Labels,
MetricValue: core.MetricValue{
ValueType: core.ValueFloat,
MetricType: core.MetricGauge,
FloatValue: newVal,
},
})
}
} else if foundNew && !foundOld || !foundNew && foundOld {
glog.V(4).Infof("Skipping rates for %s in %s: metric not found in one of old (%v) or new (%v)", metricName, key, foundOld, foundNew)
}
}
} else {
metricValNew, foundNew = newMs.MetricValues[metricName]
metricValOld, foundOld = oldMs.MetricValues[metricName]

if foundNew && foundOld && metricName == core.MetricCpuUsage.MetricDescriptor.Name {
// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) /
(newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano())

newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueInt64,
MetricType: core.MetricGauge,
IntValue: newVal,
}

} else if foundNew && foundOld && targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())
} else if foundNew && foundOld && targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())

newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueFloat,
MetricType: core.MetricGauge,
FloatValue: newVal,
newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueFloat,
MetricType: core.MetricGauge,
FloatValue: newVal,
}
} else if foundNew && !foundOld || !foundNew && foundOld {
glog.V(4).Infof("Skipping rates for %s in %s: metric not found in one of old (%v) or new (%v)", metricName, key, foundOld, foundNew)
}
} else if foundNew && !foundOld || !foundNew && foundOld {
glog.V(4).Infof("Skipping rates for %s in %s: metric not found in one of old (%v) or new (%v)", metricName, key, foundOld, foundNew)
}
}
}
Expand Down

0 comments on commit e5f1618

Please sign in to comment.