Skip to content
This repository has been archived by the owner on Dec 1, 2018. It is now read-only.

Commit

Permalink
add disk io metric
Browse files Browse the repository at this point in the history
  • Loading branch information
andyxning committed Feb 21, 2017
2 parents 0ae7708 + 9403449 commit c3f0a98
Show file tree
Hide file tree
Showing 3 changed files with 183 additions and 22 deletions.
6 changes: 5 additions & 1 deletion docs/storage-schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Heapster exports the following metrics to its backends.
| filesystem/usage | Total number of bytes consumed on a filesystem. |
| filesystem/limit | The total size of filesystem in bytes. |
| filesystem/available | The number of available bytes remaining in a the filesystem |
| disk/io_read_bytes | Number of bytes read from a disk partition |
| disk/io_write_bytes | Number of bytes written to a disk partition |
| disk/io_read_bytes_rate | Number of bytes read from a disk partition per second |
| disk/io_write_bytes_rate | Number of bytes written to a disk partition per second |
| memory/limit | Memory hard limit in bytes. |
| memory/major_page_faults | Number of major page faults. |
| memory/major_page_faults_rate | Number of major page faults per second. |
Expand Down Expand Up @@ -54,7 +58,7 @@ Heapster tags each metric with the following labels.
| hostname | Hostname where the container ran |
| labels | Comma-separated(Default) list of user-provided labels. Format is 'key:value' |
| namespace_id | UID of the namespace of a Pod |
| resource_id | A unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage |
| resource_id | A unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage, disk major:minor number under disk/io_read_bytes |

**Note**
* Label separator can be configured with Heapster `--label-seperator`. Comma-seperated label pairs is fine until we use [Bosun](http://bosun.org) as alert system and use `group by labels` to search for labels.
Expand Down
120 changes: 118 additions & 2 deletions metrics/core/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package core
import (
"time"

"fmt"
cadvisor "github.com/google/cadvisor/info/v1"
)

Expand Down Expand Up @@ -52,7 +53,9 @@ var RateMetrics = []Metric{
MetricNetworkRxRate,
MetricNetworkRxErrorsRate,
MetricNetworkTxRate,
MetricNetworkTxErrorsRate}
MetricNetworkTxErrorsRate,
MetricDiskIOReadRate,
MetricDiskIOWriteRate}

var RateMetricsMapping = map[string]Metric{
MetricCpuUsage.MetricDescriptor.Name: MetricCpuUsageRate,
Expand All @@ -61,12 +64,21 @@ var RateMetricsMapping = map[string]Metric{
MetricNetworkRx.MetricDescriptor.Name: MetricNetworkRxRate,
MetricNetworkRxErrors.MetricDescriptor.Name: MetricNetworkRxErrorsRate,
MetricNetworkTx.MetricDescriptor.Name: MetricNetworkTxRate,
MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate}
MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate,
MetricDiskIORead.MetricDescriptor.Name: MetricDiskIOReadRate,
MetricDiskIOWrite.MetricDescriptor.Name: MetricDiskIOWriteRate}

var LabeledMetrics = []Metric{
// Caution: rate calculation needs to traverse all the labeled metrics to find disk io read
// and write. In order to make disk io read and write rate calculation faster, we need to keep
// disk io read and write labeled metric on the head.
MetricDiskIORead,
MetricDiskIOWrite,
MetricFilesystemUsage,
MetricFilesystemLimit,
MetricFilesystemAvailable,
MetricDiskIOReadRate,
MetricDiskIOWriteRate,
}

var NodeAutoscalingMetrics = []Metric{
Expand Down Expand Up @@ -609,6 +621,110 @@ var MetricFilesystemAvailable = Metric{
},
}

var MetricDiskIORead = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_read_bytes",
Description: "Cumulative number of bytes read over disk",
Type: MetricCumulative,
ValueType: ValueInt64,
Units: UnitsBytes,
Labels: metricLabels,
},
HasLabeledMetric: func(spec *cadvisor.ContainerSpec) bool {
return spec.HasDiskIo
},
GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric {
result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes))
for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes {
resourceIDKey := fmt.Sprintf(
"%v:%v",
ioServiceBytesPerPartition.Major,
ioServiceBytesPerPartition.Minor,
)
var value uint64
if v, exists := ioServiceBytesPerPartition.Stats["Read"]; exists {
value = v
}

result = append(result, LabeledMetric{
Name: "disk/io_read_bytes",
Labels: map[string]string{
LabelResourceID.Key: resourceIDKey,
},
MetricValue: MetricValue{
ValueType: ValueInt64,
MetricType: MetricGauge,
IntValue: int64(value),
},
})
}
return result
},
}

var MetricDiskIOWrite = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_write_bytes",
Description: "Cumulative number of bytes write over disk",
Type: MetricCumulative,
ValueType: ValueInt64,
Units: UnitsBytes,
Labels: metricLabels,
},
HasLabeledMetric: func(spec *cadvisor.ContainerSpec) bool {
return spec.HasDiskIo
},
GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric {
result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes))
for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes {
resourceIDKey := fmt.Sprintf(
"%v:%v",
ioServiceBytesPerPartition.Major,
ioServiceBytesPerPartition.Minor,
)
var value uint64
if v, exists := ioServiceBytesPerPartition.Stats["Write"]; exists {
value = v
}

result = append(result, LabeledMetric{
Name: "disk/io_write_bytes",
Labels: map[string]string{
LabelResourceID.Key: resourceIDKey,
},
MetricValue: MetricValue{
ValueType: ValueInt64,
MetricType: MetricGauge,
IntValue: int64(value),
},
})
}
return result
},
}

var MetricDiskIOReadRate = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_read_bytes_rate",
Description: "Rate of bytes read over disk in bytes per second",
Type: MetricGauge,
ValueType: ValueFloat,
Units: UnitsCount,
Labels: metricLabels,
},
}

var MetricDiskIOWriteRate = Metric{
MetricDescriptor: MetricDescriptor{
Name: "disk/io_write_bytes_rate",
Description: "Rate of bytes written over disk in bytes per second",
Type: MetricGauge,
ValueType: ValueFloat,
Units: UnitsCount,
Labels: metricLabels,
},
}

func IsNodeAutoscalingMetric(name string) bool {
for _, autoscalingMetric := range NodeAutoscalingMetrics {
if autoscalingMetric.MetricDescriptor.Name == name {
Expand Down
79 changes: 60 additions & 19 deletions metrics/processors/rate_calculator.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
package processors

import (
"reflect"

"k8s.io/heapster/metrics/core"

"github.com/golang/glog"
Expand Down Expand Up @@ -53,29 +55,68 @@ func (this *RateCalculator) Process(batch *core.DataBatch) (*core.DataBatch, err
continue
}

var metricValNew, metricValOld core.MetricValue
var foundNew, foundOld bool

for metricName, targetMetric := range this.rateMetricsMapping {
metricValNew, foundNew := newMs.MetricValues[metricName]
metricValOld, foundOld := oldMs.MetricValues[metricName]
if foundNew && foundOld {
if metricName == core.MetricCpuUsage.MetricDescriptor.Name {
// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) /
(newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano())

newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueInt64,
MetricType: core.MetricGauge,
IntValue: newVal,

if metricName == core.MetricDiskIORead.MetricDescriptor.Name || metricName == core.MetricDiskIOWrite.MetricDescriptor.Name {
for _, itemNew := range newMs.LabeledMetrics {
foundNew, foundOld = false, false

if itemNew.Name == metricName {
metricValNew, foundNew = itemNew.MetricValue, true

for _, itemOld := range oldMs.LabeledMetrics {
if itemOld.Name == metricName && reflect.DeepEqual(itemOld.Labels, itemNew.Labels) {
metricValOld, foundOld = itemOld.MetricValue, true
break
}
}
}

} else if targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())
if foundNew && foundOld {
if targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())

newMs.LabeledMetrics = append(newMs.LabeledMetrics, core.LabeledMetric{
Name: targetMetric.MetricDescriptor.Name,
Labels: itemNew.Labels,
MetricValue: core.MetricValue{
ValueType: core.ValueFloat,
MetricType: core.MetricGauge,
FloatValue: newVal,
},
})
}
}
}
} else {
metricValNew, foundNew = newMs.MetricValues[metricName]
metricValOld, foundOld = oldMs.MetricValues[metricName]

if foundNew && foundOld {
if metricName == core.MetricCpuUsage.MetricDescriptor.Name {
// cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here).
newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) /
(newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano())

newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueInt64,
MetricType: core.MetricGauge,
IntValue: newVal,
}

} else if targetMetric.MetricDescriptor.ValueType == core.ValueFloat {
newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) /
float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano())

newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueFloat,
MetricType: core.MetricGauge,
FloatValue: newVal,
newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{
ValueType: core.ValueFloat,
MetricType: core.MetricGauge,
FloatValue: newVal,
}
}
}
}
Expand Down

0 comments on commit c3f0a98

Please sign in to comment.