From 9403449f10374137168d1e0c5b23a9bad034a9c0 Mon Sep 17 00:00:00 2001 From: Andy Xie Date: Mon, 9 Jan 2017 21:00:10 +0800 Subject: [PATCH] add disk io metric --- docs/storage-schema.md | 6 +- metrics/core/metrics.go | 120 +++++++++++++++++++++++++- metrics/processors/rate_calculator.go | 79 +++++++++++++---- 3 files changed, 183 insertions(+), 22 deletions(-) diff --git a/docs/storage-schema.md b/docs/storage-schema.md index 2472a6ad60..6b90f6105c 100644 --- a/docs/storage-schema.md +++ b/docs/storage-schema.md @@ -15,6 +15,10 @@ Heapster exports the following metrics to its backends. | filesystem/usage | Total number of bytes consumed on a filesystem. | | filesystem/limit | The total size of filesystem in bytes. | | filesystem/available | The number of available bytes remaining in a the filesystem | +| disk/io_read_bytes | Number of bytes read from a disk partition | +| disk/io_write_bytes | Number of bytes written to a disk partition | +| disk/io_read_bytes_rate | Number of bytes read from a disk partition per second | +| disk/io_write_bytes_rate | Number of bytes written to a disk partition per second | | memory/limit | Memory hard limit in bytes. | | memory/major_page_faults | Number of major page faults. | | memory/major_page_faults_rate | Number of major page faults per second. | @@ -54,7 +58,7 @@ Heapster tags each metric with the following labels. | hostname | Hostname where the container ran | | labels | Comma-separated(Default) list of user-provided labels. Format is 'key:value' | | namespace_id | UID of the namespace of a Pod | -| resource_id | An unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage | +| resource_id | An unique identifier used to differentiate multiple metrics of the same type. e.x. Fs partitions under filesystem/usage, disk major:minor number under disk/io_read_bytes | **Note** * Label separator can be configured with Heapster `--label-seperator`. Comma-seperated label pairs is fine until we use [Bosun](http://bosun.org) as alert system and use `group by labels` to search for labels. diff --git a/metrics/core/metrics.go b/metrics/core/metrics.go index 2113997660..dc080317d3 100644 --- a/metrics/core/metrics.go +++ b/metrics/core/metrics.go @@ -17,6 +17,7 @@ package core import ( "time" + "fmt" cadvisor "github.com/google/cadvisor/info/v1" ) @@ -52,7 +53,9 @@ var RateMetrics = []Metric{ MetricNetworkRxRate, MetricNetworkRxErrorsRate, MetricNetworkTxRate, - MetricNetworkTxErrorsRate} + MetricNetworkTxErrorsRate, + MetricDiskIOReadRate, + MetricDiskIOWriteRate} var RateMetricsMapping = map[string]Metric{ MetricCpuUsage.MetricDescriptor.Name: MetricCpuUsageRate, @@ -61,12 +64,21 @@ var RateMetricsMapping = map[string]Metric{ MetricNetworkRx.MetricDescriptor.Name: MetricNetworkRxRate, MetricNetworkRxErrors.MetricDescriptor.Name: MetricNetworkRxErrorsRate, MetricNetworkTx.MetricDescriptor.Name: MetricNetworkTxRate, - MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate} + MetricNetworkTxErrors.MetricDescriptor.Name: MetricNetworkTxErrorsRate, + MetricDiskIORead.MetricDescriptor.Name: MetricDiskIOReadRate, + MetricDiskIOWrite.MetricDescriptor.Name: MetricDiskIOWriteRate} var LabeledMetrics = []Metric{ + // Caution: rate calculation needs to traverse all the labeled metrics to find disk io read + // and write. In order to make disk io read and write rate calculation faster, we need to keep + // disk io read and write labeled metric on the head. + MetricDiskIORead, + MetricDiskIOWrite, MetricFilesystemUsage, MetricFilesystemLimit, MetricFilesystemAvailable, + MetricDiskIOReadRate, + MetricDiskIOWriteRate, } var NodeAutoscalingMetrics = []Metric{ @@ -609,6 +621,110 @@ var MetricFilesystemAvailable = Metric{ }, } +var MetricDiskIORead = Metric{ + MetricDescriptor: MetricDescriptor{ + Name: "disk/io_read_bytes", + Description: "Cumulative number of bytes read over disk", + Type: MetricCumulative, + ValueType: ValueInt64, + Units: UnitsBytes, + Labels: metricLabels, + }, + HasLabeledMetric: func(spec *cadvisor.ContainerSpec) bool { + return spec.HasDiskIo + }, + GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric { + result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes)) + for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes { + resourceIDKey := fmt.Sprintf( + "%v:%v", + ioServiceBytesPerPartition.Major, + ioServiceBytesPerPartition.Minor, + ) + var value uint64 + if v, exists := ioServiceBytesPerPartition.Stats["Read"]; exists { + value = v + } + + result = append(result, LabeledMetric{ + Name: "disk/io_read_bytes", + Labels: map[string]string{ + LabelResourceID.Key: resourceIDKey, + }, + MetricValue: MetricValue{ + ValueType: ValueInt64, + MetricType: MetricGauge, + IntValue: int64(value), + }, + }) + } + return result + }, +} + +var MetricDiskIOWrite = Metric{ + MetricDescriptor: MetricDescriptor{ + Name: "disk/io_write_bytes", + Description: "Cumulative number of bytes write over disk", + Type: MetricCumulative, + ValueType: ValueInt64, + Units: UnitsBytes, + Labels: metricLabels, + }, + HasLabeledMetric: func(spec *cadvisor.ContainerSpec) bool { + return spec.HasDiskIo + }, + GetLabeledMetric: func(spec *cadvisor.ContainerSpec, stat *cadvisor.ContainerStats) []LabeledMetric { + result := make([]LabeledMetric, 0, len(stat.DiskIo.IoServiceBytes)) + for _, ioServiceBytesPerPartition := range stat.DiskIo.IoServiceBytes { + resourceIDKey := fmt.Sprintf( + "%v:%v", + ioServiceBytesPerPartition.Major, + ioServiceBytesPerPartition.Minor, + ) + var value uint64 + if v, exists := ioServiceBytesPerPartition.Stats["Write"]; exists { + value = v + } + + result = append(result, LabeledMetric{ + Name: "disk/io_write_bytes", + Labels: map[string]string{ + LabelResourceID.Key: resourceIDKey, + }, + MetricValue: MetricValue{ + ValueType: ValueInt64, + MetricType: MetricGauge, + IntValue: int64(value), + }, + }) + } + return result + }, +} + +var MetricDiskIOReadRate = Metric{ + MetricDescriptor: MetricDescriptor{ + Name: "disk/io_read_bytes_rate", + Description: "Rate of bytes read over disk in bytes per second", + Type: MetricGauge, + ValueType: ValueFloat, + Units: UnitsCount, + Labels: metricLabels, + }, +} + +var MetricDiskIOWriteRate = Metric{ + MetricDescriptor: MetricDescriptor{ + Name: "disk/io_write_bytes_rate", + Description: "Rate of bytes written over disk in bytes per second", + Type: MetricGauge, + ValueType: ValueFloat, + Units: UnitsCount, + Labels: metricLabels, + }, +} + func IsNodeAutoscalingMetric(name string) bool { for _, autoscalingMetric := range NodeAutoscalingMetrics { if autoscalingMetric.MetricDescriptor.Name == name { diff --git a/metrics/processors/rate_calculator.go b/metrics/processors/rate_calculator.go index 5682f12cd0..577ae34620 100644 --- a/metrics/processors/rate_calculator.go +++ b/metrics/processors/rate_calculator.go @@ -15,6 +15,8 @@ package processors import ( + "reflect" + "k8s.io/heapster/metrics/core" "github.com/golang/glog" @@ -53,29 +55,68 @@ func (this *RateCalculator) Process(batch *core.DataBatch) (*core.DataBatch, err continue } + var metricValNew, metricValOld core.MetricValue + var foundNew, foundOld bool + for metricName, targetMetric := range this.rateMetricsMapping { - metricValNew, foundNew := newMs.MetricValues[metricName] - metricValOld, foundOld := oldMs.MetricValues[metricName] - if foundNew && foundOld { - if metricName == core.MetricCpuUsage.MetricDescriptor.Name { - // cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here). - newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) / - (newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano()) - - newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{ - ValueType: core.ValueInt64, - MetricType: core.MetricGauge, - IntValue: newVal, + + if metricName == core.MetricDiskIORead.MetricDescriptor.Name || metricName == core.MetricDiskIOWrite.MetricDescriptor.Name { + for _, itemNew := range newMs.LabeledMetrics { + foundNew, foundOld = false, false + + if itemNew.Name == metricName { + metricValNew, foundNew = itemNew.MetricValue, true + + for _, itemOld := range oldMs.LabeledMetrics { + if itemOld.Name == metricName && reflect.DeepEqual(itemOld.Labels, itemNew.Labels) { + metricValOld, foundOld = itemOld.MetricValue, true + break + } + } } - } else if targetMetric.MetricDescriptor.ValueType == core.ValueFloat { - newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) / - float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano()) + if foundNew && foundOld { + if targetMetric.MetricDescriptor.ValueType == core.ValueFloat { + newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) / + float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano()) + + newMs.LabeledMetrics = append(newMs.LabeledMetrics, core.LabeledMetric{ + Name: targetMetric.MetricDescriptor.Name, + Labels: itemNew.Labels, + MetricValue: core.MetricValue{ + ValueType: core.ValueFloat, + MetricType: core.MetricGauge, + FloatValue: newVal, + }, + }) + } + } + } + } else { + metricValNew, foundNew = newMs.MetricValues[metricName] + metricValOld, foundOld = oldMs.MetricValues[metricName] + + if foundNew && foundOld { + if metricName == core.MetricCpuUsage.MetricDescriptor.Name { + // cpu/usage values are in nanoseconds; we want to have it in millicores (that's why constant 1000 is here). + newVal := 1000 * (metricValNew.IntValue - metricValOld.IntValue) / + (newMs.ScrapeTime.UnixNano() - oldMs.ScrapeTime.UnixNano()) + + newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{ + ValueType: core.ValueInt64, + MetricType: core.MetricGauge, + IntValue: newVal, + } + + } else if targetMetric.MetricDescriptor.ValueType == core.ValueFloat { + newVal := 1e9 * float32(metricValNew.IntValue-metricValOld.IntValue) / + float32(newMs.ScrapeTime.UnixNano()-oldMs.ScrapeTime.UnixNano()) - newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{ - ValueType: core.ValueFloat, - MetricType: core.MetricGauge, - FloatValue: newVal, + newMs.MetricValues[targetMetric.MetricDescriptor.Name] = core.MetricValue{ + ValueType: core.ValueFloat, + MetricType: core.MetricGauge, + FloatValue: newVal, + } } } }