From 5a280a5cfde07fceeaf83df2266fd6e3cca35412 Mon Sep 17 00:00:00 2001 From: Xuewei Zhang Date: Thu, 15 Aug 2019 17:14:54 -0700 Subject: [PATCH] Add Stackdriver exporter --- Makefile | 2 +- .../exporterplugins/default_plugin.go | 20 +++ .../stackdriver_exporter_plugin.go | 25 +++ .../node_problem_detector.go | 16 +- cmd/options/options.go | 21 ++- cmd/options/options_test.go | 54 ++++-- config/stackdriver-exporter.json | 6 + .../node-problem-detector-stackdriver.service | 16 ++ pkg/exporters/register.go | 73 ++++++++ pkg/exporters/register_test.go | 72 ++++++++ pkg/exporters/stackdriver/config/config.go | 62 +++++++ .../stackdriver/config/config_test.go | 164 ++++++++++++++++++ pkg/exporters/stackdriver/metadata/type.go | 65 +++++++ .../stackdriver/stackdriver_exporter.go | 146 ++++++++++++++++ .../stackdriver/stackdriver_exporter_test.go | 33 ++++ pkg/problemmetrics/problem_metrics.go | 6 +- pkg/systemstatsmonitor/disk_collector.go | 9 +- pkg/systemstatsmonitor/host_collector.go | 3 +- pkg/types/types.go | 16 ++ pkg/util/metrics/helpers.go | 149 ---------------- pkg/util/metrics/metric_float64.go | 100 +++++++++++ pkg/util/metrics/metric_id.go | 70 ++++++++ pkg/util/metrics/metric_int64.go | 100 +++++++++++ test/e2e-install.sh | 4 +- 24 files changed, 1056 insertions(+), 176 deletions(-) create mode 100644 cmd/nodeproblemdetector/exporterplugins/default_plugin.go create mode 100644 cmd/nodeproblemdetector/exporterplugins/stackdriver_exporter_plugin.go create mode 100644 config/stackdriver-exporter.json create mode 100644 config/systemd/node-problem-detector-stackdriver.service create mode 100644 pkg/exporters/register.go create mode 100644 pkg/exporters/register_test.go create mode 100644 pkg/exporters/stackdriver/config/config.go create mode 100644 pkg/exporters/stackdriver/config/config_test.go create mode 100644 pkg/exporters/stackdriver/metadata/type.go create mode 100644 pkg/exporters/stackdriver/stackdriver_exporter.go create mode 100644 pkg/exporters/stackdriver/stackdriver_exporter_test.go create mode 100644 pkg/util/metrics/metric_float64.go create mode 100644 pkg/util/metrics/metric_id.go create mode 100644 pkg/util/metrics/metric_int64.go diff --git a/Makefile b/Makefile index e5ff59a8d..9cdf42c6a 100644 --- a/Makefile +++ b/Makefile @@ -41,7 +41,7 @@ PKG:=k8s.io/node-problem-detector PKG_SOURCES:=$(shell find pkg cmd -name '*.go') # TARBALL is the name of release tar. Include binary version by default. -TARBALL:=node-problem-detector-$(VERSION).tar.gz +TARBALL?=node-problem-detector-$(VERSION).tar.gz # IMAGE is the image name of the node problem detector container image. IMAGE:=$(REGISTRY)/node-problem-detector:$(TAG) diff --git a/cmd/nodeproblemdetector/exporterplugins/default_plugin.go b/cmd/nodeproblemdetector/exporterplugins/default_plugin.go new file mode 100644 index 000000000..2e348b4e8 --- /dev/null +++ b/cmd/nodeproblemdetector/exporterplugins/default_plugin.go @@ -0,0 +1,20 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exporterplugins + +// This file is necessary to make sure the exporterplugins package non-empty +// under any build tags. diff --git a/cmd/nodeproblemdetector/exporterplugins/stackdriver_exporter_plugin.go b/cmd/nodeproblemdetector/exporterplugins/stackdriver_exporter_plugin.go new file mode 100644 index 000000000..a580549c3 --- /dev/null +++ b/cmd/nodeproblemdetector/exporterplugins/stackdriver_exporter_plugin.go @@ -0,0 +1,25 @@ +// +build !disable_stackdriver_exporter + +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exporterplugins + +import ( + _ "k8s.io/node-problem-detector/pkg/exporters/stackdriver" +) + +// The stackdriver plugin takes about 6MB in the NPD binary. diff --git a/cmd/nodeproblemdetector/node_problem_detector.go b/cmd/nodeproblemdetector/node_problem_detector.go index 071853861..b551373d4 100644 --- a/cmd/nodeproblemdetector/node_problem_detector.go +++ b/cmd/nodeproblemdetector/node_problem_detector.go @@ -22,8 +22,10 @@ import ( "github.com/golang/glog" "github.com/spf13/pflag" + _ "k8s.io/node-problem-detector/cmd/nodeproblemdetector/exporterplugins" _ "k8s.io/node-problem-detector/cmd/nodeproblemdetector/problemdaemonplugins" "k8s.io/node-problem-detector/cmd/options" + "k8s.io/node-problem-detector/pkg/exporters" "k8s.io/node-problem-detector/pkg/exporters/k8sexporter" "k8s.io/node-problem-detector/pkg/exporters/prometheusexporter" "k8s.io/node-problem-detector/pkg/problemdaemon" @@ -54,21 +56,25 @@ func main() { } // Initialize exporters. - exporters := []types.Exporter{} + initializedExporters := []types.Exporter{} if ke := k8sexporter.NewExporterOrDie(npdo); ke != nil { - exporters = append(exporters, ke) + initializedExporters = append(initializedExporters, ke) glog.Info("K8s exporter started.") } if pe := prometheusexporter.NewExporterOrDie(npdo); pe != nil { - exporters = append(exporters, pe) + initializedExporters = append(initializedExporters, pe) glog.Info("Prometheus exporter started.") } - if len(exporters) == 0 { + plugableExporters := exporters.NewExporters(npdo.ExporterConfigPaths) + if len(plugableExporters) != 0 { + initializedExporters = append(initializedExporters, plugableExporters...) + } + if len(initializedExporters) == 0 { glog.Fatalf("No exporter is successfully setup") } // Initialize NPD core. - p := problemdetector.NewProblemDetector(problemDaemons, exporters) + p := problemdetector.NewProblemDetector(problemDaemons, initializedExporters) if err := p.Run(); err != nil { glog.Fatalf("Problem detector failed with error: %v", err) } diff --git a/cmd/options/options.go b/cmd/options/options.go index ab6ccdbcf..abcd2d198 100644 --- a/cmd/options/options.go +++ b/cmd/options/options.go @@ -26,6 +26,7 @@ import ( "github.com/spf13/pflag" + "k8s.io/node-problem-detector/pkg/exporters" "k8s.io/node-problem-detector/pkg/problemdaemon" "k8s.io/node-problem-detector/pkg/types" ) @@ -63,6 +64,9 @@ type NodeProblemDetectorOptions struct { // PrometheusServerAddress is the address to bind the Prometheus scrape endpoint. PrometheusServerAddress string + // ExporterConfigPaths specifies the list of paths to configuration files for each exporter. + ExporterConfigPaths types.ExporterConfigPathMap + // problem daemon options // SystemLogMonitorConfigPaths specifies the list of paths to system log monitor configuration @@ -85,7 +89,13 @@ type NodeProblemDetectorOptions struct { } func NewNodeProblemDetectorOptions() *NodeProblemDetectorOptions { - npdo := &NodeProblemDetectorOptions{MonitorConfigPaths: types.ProblemDaemonConfigPathMap{}} + npdo := &NodeProblemDetectorOptions{ + ExporterConfigPaths: types.ExporterConfigPathMap{}, + MonitorConfigPaths: types.ProblemDaemonConfigPathMap{}} + + for _, exporterName := range exporters.GetExporterNames() { + npdo.ExporterConfigPaths[exporterName] = &[]string{} + } for _, problemDaemonName := range problemdaemon.GetProblemDaemonNames() { npdo.MonitorConfigPaths[problemDaemonName] = &[]string{} } @@ -118,6 +128,15 @@ func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&npdo.PrometheusServerAddress, "prometheus-address", "127.0.0.1", "The address to bind the Prometheus scrape endpoint.") + for _, exporterName := range exporters.GetExporterNames() { + fs.StringSliceVar( + npdo.ExporterConfigPaths[exporterName], + "exporter."+string(exporterName), + []string{}, + fmt.Sprintf("Comma separated configurations for %v exporter. %v", + exporterName, + exporters.GetExporterHandlerOrDie(exporterName).CmdOptionDescription)) + } for _, problemDaemonName := range problemdaemon.GetProblemDaemonNames() { fs.StringSliceVar( npdo.MonitorConfigPaths[problemDaemonName], diff --git a/cmd/options/options_test.go b/cmd/options/options_test.go index 05902db7e..511ddd080 100644 --- a/cmd/options/options_test.go +++ b/cmd/options/options_test.go @@ -120,10 +120,13 @@ func TestSetNodeNameOrDie(t *testing.T) { } func TestValidOrDie(t *testing.T) { + emptyMonitorConfigMap := types.ProblemDaemonConfigPathMap{} fooMonitorConfigMap := types.ProblemDaemonConfigPathMap{} fooMonitorConfigMap["foo-monitor"] = &[]string{"config-a", "config-b"} - emptyMonitorConfigMap := types.ProblemDaemonConfigPathMap{} + emptyExporterConfigMap := types.ExporterConfigPathMap{} + barExporterConfigMap := types.ExporterConfigPathMap{} + barExporterConfigMap["bar-exporter"] = &[]string{"config-c", "config-d"} testCases := []struct { name string @@ -133,48 +136,54 @@ func TestValidOrDie(t *testing.T) { { name: "default k8s exporter config", npdo: NodeProblemDetectorOptions{ - MonitorConfigPaths: fooMonitorConfigMap, + MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: false, }, { name: "enables k8s exporter config", npdo: NodeProblemDetectorOptions{ - ApiServerOverride: "", - EnableK8sExporter: true, - MonitorConfigPaths: fooMonitorConfigMap, + ApiServerOverride: "", + EnableK8sExporter: true, + MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: false, }, { name: "k8s exporter config with valid ApiServerOverride", npdo: NodeProblemDetectorOptions{ - ApiServerOverride: "127.0.0.1", - EnableK8sExporter: true, - MonitorConfigPaths: fooMonitorConfigMap, + ApiServerOverride: "127.0.0.1", + EnableK8sExporter: true, + MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: false, }, { name: "k8s exporter config with invalid ApiServerOverride", npdo: NodeProblemDetectorOptions{ - ApiServerOverride: ":foo", - EnableK8sExporter: true, - MonitorConfigPaths: fooMonitorConfigMap, + ApiServerOverride: ":foo", + EnableK8sExporter: true, + MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, { name: "non-empty MonitorConfigPaths", npdo: NodeProblemDetectorOptions{ - MonitorConfigPaths: fooMonitorConfigMap, + MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: false, }, { name: "empty MonitorConfigPaths", npdo: NodeProblemDetectorOptions{ - MonitorConfigPaths: emptyMonitorConfigMap, + MonitorConfigPaths: emptyMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, @@ -188,6 +197,7 @@ func TestValidOrDie(t *testing.T) { npdo: NodeProblemDetectorOptions{ SystemLogMonitorConfigPaths: []string{"config-a"}, MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, @@ -196,6 +206,7 @@ func TestValidOrDie(t *testing.T) { npdo: NodeProblemDetectorOptions{ CustomPluginMonitorConfigPaths: []string{"config-a"}, MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, @@ -204,6 +215,7 @@ func TestValidOrDie(t *testing.T) { npdo: NodeProblemDetectorOptions{ SystemLogMonitorConfigPaths: []string{"config-a"}, MonitorConfigPaths: emptyMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, @@ -211,6 +223,7 @@ func TestValidOrDie(t *testing.T) { name: "deprecated SystemLogMonitor option with un-initialized MonitorConfigPaths", npdo: NodeProblemDetectorOptions{ SystemLogMonitorConfigPaths: []string{"config-a"}, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, @@ -219,6 +232,7 @@ func TestValidOrDie(t *testing.T) { npdo: NodeProblemDetectorOptions{ CustomPluginMonitorConfigPaths: []string{"config-b"}, MonitorConfigPaths: emptyMonitorConfigMap, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, @@ -226,9 +240,23 @@ func TestValidOrDie(t *testing.T) { name: "deprecated CustomPluginMonitor option with un-initialized MonitorConfigPaths", npdo: NodeProblemDetectorOptions{ CustomPluginMonitorConfigPaths: []string{"config-b"}, + ExporterConfigPaths: barExporterConfigMap, }, expectPanic: true, }, + { + name: "empty ExporterConfigPaths", + npdo: NodeProblemDetectorOptions{ + MonitorConfigPaths: fooMonitorConfigMap, + ExporterConfigPaths: emptyExporterConfigMap, + }, + expectPanic: false, + }, + { + name: "un-initialized ExporterConfigPaths", + npdo: NodeProblemDetectorOptions{}, + expectPanic: true, + }, } for _, test := range testCases { diff --git a/config/stackdriver-exporter.json b/config/stackdriver-exporter.json new file mode 100644 index 000000000..f26f64ad8 --- /dev/null +++ b/config/stackdriver-exporter.json @@ -0,0 +1,6 @@ +// These metrics can only be used in Stackdriver Monitoring staging environment for now. +// TODO(xueweiz): point to Prod API endpoint when the the metrics are rolled out. +{ + "apiEndpoint": "staging-monitoring.sandbox.googleapis.com:443", + "exportPeriod": "60s" +} diff --git a/config/systemd/node-problem-detector-stackdriver.service b/config/systemd/node-problem-detector-stackdriver.service new file mode 100644 index 000000000..08765394b --- /dev/null +++ b/config/systemd/node-problem-detector-stackdriver.service @@ -0,0 +1,16 @@ +[Unit] +Description=Node problem detector +Wants=network-online.target +After=network-online.target + +[Service] +Restart=always +RestartSec=10 +ExecStart=/home/kubernetes/bin/node-problem-detector --v=2 --logtostderr --enable-k8s-exporter=false \ + --exporter.stackdriver=/home/kubernetes/node-problem-detector/config/stackdriver-exporter.json \ + --config.system-log-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor.json,/home/kubernetes/node-problem-detector/config/docker-monitor.json,/home/kubernetes/node-problem-detector/config/systemd-monitor.json \ + --config.custom-plugin-monitor=/home/kubernetes/node-problem-detector/config/kernel-monitor-counter.json,/home/kubernetes/node-problem-detector/config/systemd-monitor-counter.json \ + --config.system-stats-monitor=/home/kubernetes/node-problem-detector/config/system-stats-monitor.json + +[Install] +WantedBy=multi-user.target diff --git a/pkg/exporters/register.go b/pkg/exporters/register.go new file mode 100644 index 000000000..52c634300 --- /dev/null +++ b/pkg/exporters/register.go @@ -0,0 +1,73 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exporters + +import ( + "fmt" + + "github.com/golang/glog" + + "k8s.io/node-problem-detector/pkg/types" +) + +var ( + handlers = make(map[types.ExporterType]types.ExporterHandler) +) + +// Register registers a exporter factory method, which will be used to create the exporter. +func Register(exporterType types.ExporterType, handler types.ExporterHandler) { + handlers[exporterType] = handler +} + +// GetExporterNames retrieves all available exporter types. +func GetExporterNames() []types.ExporterType { + exporterTypes := []types.ExporterType{} + for exporterType := range handlers { + exporterTypes = append(exporterTypes, exporterType) + } + return exporterTypes +} + +// GetExporterHandlerOrDie retrieves the ExporterHandler for a specific type of exporter, panic if error occurs.. +func GetExporterHandlerOrDie(exporterType types.ExporterType) types.ExporterHandler { + handler, ok := handlers[exporterType] + if !ok { + panic(fmt.Sprintf("Exporter handler for %v does not exist", exporterType)) + } + return handler +} + +// NewExporters creates all exporters based on the configurations provided. +func NewExporters(exporterConfigPaths types.ExporterConfigPathMap) []types.Exporter { + exporterMap := make(map[string]types.Exporter) + for exporterType, configs := range exporterConfigPaths { + for _, config := range *configs { + if _, ok := exporterMap[config]; ok { + // Skip the config if it's duplicated. + glog.Warningf("Duplicated exporter configuration %q", config) + continue + } + exporterMap[config] = handlers[exporterType].CreateExporterOrDie(config) + } + } + + exporters := []types.Exporter{} + for _, exporter := range exporterMap { + exporters = append(exporters, exporter) + } + return exporters +} diff --git a/pkg/exporters/register_test.go b/pkg/exporters/register_test.go new file mode 100644 index 000000000..b4b69095e --- /dev/null +++ b/pkg/exporters/register_test.go @@ -0,0 +1,72 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package exporters + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "k8s.io/node-problem-detector/pkg/types" +) + +func TestRegistration(t *testing.T) { + fooExporterFactory := func(configPath string) types.Exporter { + return nil + } + fooExporterHandler := types.ExporterHandler{ + CreateExporterOrDie: fooExporterFactory, + CmdOptionDescription: "foo option", + } + + barExporterFactory := func(configPath string) types.Exporter { + return nil + } + barExporterHandler := types.ExporterHandler{ + CreateExporterOrDie: barExporterFactory, + CmdOptionDescription: "bar option", + } + + Register("foo", fooExporterHandler) + Register("bar", barExporterHandler) + + expectedExporterNames := []types.ExporterType{"foo", "bar"} + exporterNames := GetExporterNames() + + assert.ElementsMatch(t, expectedExporterNames, exporterNames) + assert.Equal(t, "foo option", GetExporterHandlerOrDie("foo").CmdOptionDescription) + assert.Equal(t, "bar option", GetExporterHandlerOrDie("bar").CmdOptionDescription) + + handlers = make(map[types.ExporterType]types.ExporterHandler) +} + +func TestGetExporterHandlerOrDie(t *testing.T) { + fooExporterFactory := func(configPath string) types.Exporter { + return nil + } + fooExporterHandler := types.ExporterHandler{ + CreateExporterOrDie: fooExporterFactory, + CmdOptionDescription: "foo option", + } + + Register("foo", fooExporterHandler) + + assert.NotPanics(t, func() { GetExporterHandlerOrDie("foo") }) + assert.Panics(t, func() { GetExporterHandlerOrDie("bar") }) + + handlers = make(map[types.ExporterType]types.ExporterHandler) +} diff --git a/pkg/exporters/stackdriver/config/config.go b/pkg/exporters/stackdriver/config/config.go new file mode 100644 index 000000000..f9d28d966 --- /dev/null +++ b/pkg/exporters/stackdriver/config/config.go @@ -0,0 +1,62 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "fmt" + "time" + + "k8s.io/node-problem-detector/pkg/exporters/stackdriver/metadata" +) + +var ( + defaultExportPeriodString = (60 * time.Second).String() + defaultEndpoint = "monitoring.googleapis.com:443" +) + +type StackdriverExporterConfig struct { + ExportPeriodString string `json:"exportPeriod"` + ExportPeriod time.Duration `json:"-"` + APIEndpoint string `json:"apiEndpoint"` + Metadata metadata.Metadata `json:"metadata"` +} + +// ApplyConfiguration applies default configurations. +func (sec *StackdriverExporterConfig) ApplyConfiguration() error { + if sec.ExportPeriodString == "" { + sec.ExportPeriodString = defaultExportPeriodString + } + if sec.APIEndpoint == "" { + sec.APIEndpoint = defaultEndpoint + } + + var err error + sec.ExportPeriod, err = time.ParseDuration(sec.ExportPeriodString) + if err != nil { + return fmt.Errorf("error in parsing ExportPeriodString %q: %v", sec.ExportPeriodString, err) + } + + return nil +} + +// Validate verifies whether the settings are valid. +func (sec *StackdriverExporterConfig) Validate() error { + if sec.ExportPeriod <= time.Duration(0) { + return fmt.Errorf("ExportPeriod %v must be above 0s", sec.ExportPeriod) + } + return nil +} diff --git a/pkg/exporters/stackdriver/config/config_test.go b/pkg/exporters/stackdriver/config/config_test.go new file mode 100644 index 000000000..44a843acf --- /dev/null +++ b/pkg/exporters/stackdriver/config/config_test.go @@ -0,0 +1,164 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +import ( + "reflect" + "testing" + "time" + + "k8s.io/node-problem-detector/pkg/exporters/stackdriver/metadata" +) + +func TestApplyConfiguration(t *testing.T) { + testCases := []struct { + name string + orignalConfig StackdriverExporterConfig + wantedConfig StackdriverExporterConfig + isError bool + }{ + { + name: "normal", + orignalConfig: StackdriverExporterConfig{ + ExportPeriodString: "60s", + APIEndpoint: "monitoring.googleapis.com:443", + Metadata: metadata.Metadata{ + ProjectID: "some-gcp-project", + Zone: "us-central1-a", + InstanceID: "56781234", + InstanceName: "some-gce-instance", + }, + }, + isError: false, + wantedConfig: StackdriverExporterConfig{ + ExportPeriodString: "60s", + ExportPeriod: 60 * time.Second, + APIEndpoint: "monitoring.googleapis.com:443", + Metadata: metadata.Metadata{ + ProjectID: "some-gcp-project", + Zone: "us-central1-a", + InstanceID: "56781234", + InstanceName: "some-gce-instance", + }, + }, + }, + { + name: "staging API endpoint", + orignalConfig: StackdriverExporterConfig{ + ExportPeriodString: "60s", + APIEndpoint: "staging-monitoring.sandbox.googleapis.com:443", + Metadata: metadata.Metadata{ + ProjectID: "some-gcp-project", + Zone: "us-central1-a", + InstanceID: "56781234", + InstanceName: "some-gce-instance", + }, + }, + isError: false, + wantedConfig: StackdriverExporterConfig{ + ExportPeriodString: "60s", + ExportPeriod: 60 * time.Second, + APIEndpoint: "staging-monitoring.sandbox.googleapis.com:443", + Metadata: metadata.Metadata{ + ProjectID: "some-gcp-project", + Zone: "us-central1-a", + InstanceID: "56781234", + InstanceName: "some-gce-instance", + }, + }, + }, + { + name: "empty", + orignalConfig: StackdriverExporterConfig{}, + isError: false, + wantedConfig: StackdriverExporterConfig{ + ExportPeriodString: "1m0s", + ExportPeriod: 60 * time.Second, + APIEndpoint: "monitoring.googleapis.com:443", + }, + }, + { + name: "error", + orignalConfig: StackdriverExporterConfig{ + ExportPeriodString: "foo", + }, + isError: true, + wantedConfig: StackdriverExporterConfig{}, + }, + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + err := test.orignalConfig.ApplyConfiguration() + if err == nil && test.isError { + t.Errorf("Wanted an error got nil") + } + if err != nil && !test.isError { + t.Errorf("Wanted nil got an error") + } + if !test.isError && !reflect.DeepEqual(test.orignalConfig, test.wantedConfig) { + t.Errorf("Wanted: %+v. \nGot: %+v", test.wantedConfig, test.orignalConfig) + } + }) + } +} + +func TestValidate(t *testing.T) { + testCases := []struct { + name string + config StackdriverExporterConfig + isError bool + }{ + { + name: "normal", + config: StackdriverExporterConfig{ + ExportPeriodString: "60s", + APIEndpoint: "staging-monitoring.sandbox.googleapis.com:443", + Metadata: metadata.Metadata{ + ProjectID: "some-gcp-project", + Zone: "us-central1-a", + InstanceID: "56781234", + InstanceName: "some-gce-instance", + }, + }, + isError: false, + }, + { + name: "negativeExportPeriod", + config: StackdriverExporterConfig{ + ExportPeriodString: "-5s", + }, + isError: true, + }, + } + + for _, test := range testCases { + t.Run(test.name, func(t *testing.T) { + if err := test.config.ApplyConfiguration(); err != nil { + t.Errorf("Wanted no error with config %+v, got %v", test.config, err) + } + + err := test.config.Validate() + if test.isError && err == nil { + t.Errorf("Wanted an error with config %+v, got nil", test.config) + } + if !test.isError && err != nil { + t.Errorf("Wanted nil with config %+v got an error", test.config) + } + }) + } +} diff --git a/pkg/exporters/stackdriver/metadata/type.go b/pkg/exporters/stackdriver/metadata/type.go new file mode 100644 index 000000000..29b6bac25 --- /dev/null +++ b/pkg/exporters/stackdriver/metadata/type.go @@ -0,0 +1,65 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metadata + +import ( + "cloud.google.com/go/compute/metadata" +) + +type Metadata struct { + ProjectID string `json:"projectID"` + Zone string `json:"zone"` + InstanceID string `json:"instanceID"` + InstanceName string `json:"instanceName"` +} + +func (md *Metadata) IsValid() bool { + if md.ProjectID == "" || md.Zone == "" || md.InstanceID == "" || md.InstanceName == "" { + return false + } + return true +} + +func (md *Metadata) PopulateFromGCE() error { + var err error + if md.ProjectID == "" { + md.ProjectID, err = metadata.ProjectID() + if err != nil { + return err + } + } + if md.Zone == "" { + md.Zone, err = metadata.Zone() + if err != nil { + return err + } + } + if md.InstanceID == "" { + md.InstanceID, err = metadata.InstanceID() + if err != nil { + return err + } + } + if md.InstanceName == "" { + md.InstanceName, err = metadata.InstanceName() + if err != nil { + return err + } + } + + return nil +} diff --git a/pkg/exporters/stackdriver/stackdriver_exporter.go b/pkg/exporters/stackdriver/stackdriver_exporter.go new file mode 100644 index 000000000..100e3c9db --- /dev/null +++ b/pkg/exporters/stackdriver/stackdriver_exporter.go @@ -0,0 +1,146 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package stackdriverexporter + +import ( + "encoding/json" + "io/ioutil" + + "contrib.go.opencensus.io/exporter/stackdriver" + monitoredres "contrib.go.opencensus.io/exporter/stackdriver/monitoredresource" + "github.com/golang/glog" + "go.opencensus.io/stats/view" + "google.golang.org/api/option" + + "k8s.io/node-problem-detector/pkg/exporters" + seconfig "k8s.io/node-problem-detector/pkg/exporters/stackdriver/config" + "k8s.io/node-problem-detector/pkg/types" + "k8s.io/node-problem-detector/pkg/util/metrics" +) + +const exporterName = "stackdriver" + +var NPDMetricToSDMetric = map[metrics.MetricID]string{ + metrics.HostUptimeID: "compute.googleapis.com/guest/system/uptime", + metrics.ProblemCounterID: "compute.googleapis.com/guest/system/problem_count", + metrics.DiskAvgQueueLenID: "compute.googleapis.com/guest/disk/queue_length", + metrics.DiskIOTimeID: "compute.googleapis.com/guest/disk/io_time", + metrics.DiskWeightedIOID: "compute.googleapis.com/guest/disk/weighted_io_time", +} + +func init() { + exporters.Register(exporterName, types.ExporterHandler{ + CreateExporterOrDie: NewExporterOrDie, + CmdOptionDescription: "Set to config file paths."}) +} + +type stackdriverExporter struct { + configPath string + config seconfig.StackdriverExporterConfig +} + +func getMetricType(view *view.View) string { + viewName := view.Measure.Name() + // When there is no pre-defined Stackdriver Metric Type, fallback to custom metrics. + // e.g. custom.googleapis.com/npd/host/uptime + fallbackMetricType := "custom.googleapis.com/npd/" + viewName + + metricID, ok := metrics.MetricMap.ViewNameToMetricID(viewName) + if !ok { + return fallbackMetricType + } + stackdriverMetricType, ok := NPDMetricToSDMetric[metricID] + if !ok { + return fallbackMetricType + } + return stackdriverMetricType +} + +func (se *stackdriverExporter) setupViewExporterOrDie() { + clientOption := option.WithEndpoint(se.config.APIEndpoint) + + var globalLabels stackdriver.Labels + globalLabels.Set("instance_name", se.config.Metadata.InstanceName, "The name of the VM instance") + + viewExporter, err := stackdriver.NewExporter(stackdriver.Options{ + ProjectID: se.config.Metadata.ProjectID, + MonitoringClientOptions: []option.ClientOption{clientOption}, + MonitoredResource: &monitoredres.GCEInstance{ + ProjectID: se.config.Metadata.ProjectID, + InstanceID: se.config.Metadata.InstanceID, + Zone: se.config.Metadata.Zone, + }, + GetMetricType: getMetricType, + DefaultMonitoringLabels: &globalLabels, + }) + if err != nil { + glog.Fatalf("Failed to create Stackdriver OpenCensus view exporter: %v", err) + } + + view.SetReportingPeriod(se.config.ExportPeriod) + view.RegisterExporter(viewExporter) +} + +func (se *stackdriverExporter) populateMetadata() { + if se.config.Metadata.IsValid() { + return + } + + err := se.config.Metadata.PopulateFromGCE() + if err != nil { + glog.Fatalf("Failed to populate GCE metadata: %v", err) + } +} + +// NewExporterOrDie creates an exporter to export metrics to Stackdriver, panics if error occurs. +func NewExporterOrDie(configPath string) types.Exporter { + se := stackdriverExporter{configPath: configPath} + + // Apply configurations. + f, err := ioutil.ReadFile(configPath) + if err != nil { + glog.Fatalf("Failed to read configuration file %q: %v", configPath, err) + } + err = json.Unmarshal(f, &se.config) + if err != nil { + glog.Fatalf("Failed to unmarshal configuration file %q: %v", configPath, err) + } + + err = se.config.ApplyConfiguration() + if err != nil { + glog.Fatalf("Failed to apply configuration for %q: %v", configPath, err) + } + + err = se.config.Validate() + if err != nil { + glog.Fatalf("Failed to validate %s configuration %+v: %v", se.configPath, se.config, err) + } + + glog.Infof("Starting Stackdriver exporter %s", configPath) + + se.populateMetadata() + glog.Infof("Using metadata: %v", se.config.Metadata) + se.setupViewExporterOrDie() + + return &se +} + +// ExportProblems does nothing. +// Stackdriver exporter only exports metrics. +func (se *stackdriverExporter) ExportProblems(status *types.Status) { + return +} diff --git a/pkg/exporters/stackdriver/stackdriver_exporter_test.go b/pkg/exporters/stackdriver/stackdriver_exporter_test.go new file mode 100644 index 000000000..f5f394815 --- /dev/null +++ b/pkg/exporters/stackdriver/stackdriver_exporter_test.go @@ -0,0 +1,33 @@ +// +build !disable_stackdriver_exporter + +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package stackdriverexporter + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "k8s.io/node-problem-detector/pkg/exporters" +) + +func TestRegistration(t *testing.T) { + assert.NotPanics(t, + func() { exporters.GetExporterHandlerOrDie(exporterName) }, + "Stackdriver exporter failed to register itself as an exporter.") +} diff --git a/pkg/problemmetrics/problem_metrics.go b/pkg/problemmetrics/problem_metrics.go index 2d82ab047..e6241e6df 100644 --- a/pkg/problemmetrics/problem_metrics.go +++ b/pkg/problemmetrics/problem_metrics.go @@ -49,7 +49,8 @@ func NewProblemMetricsManagerOrDie() *ProblemMetricsManager { var err error pmm.problemCounter, err = metrics.NewInt64Metric( - "problem_counter", + metrics.ProblemCounterID, + string(metrics.ProblemCounterID), "Number of times a specific type of problem have occurred.", "1", metrics.Sum, @@ -59,7 +60,8 @@ func NewProblemMetricsManagerOrDie() *ProblemMetricsManager { } pmm.problemGauge, err = metrics.NewInt64Metric( - "problem_gauge", + metrics.ProblemGaugeID, + string(metrics.ProblemGaugeID), "Whether a specific type of problem is affecting the node or not.", "1", metrics.LastValue, diff --git a/pkg/systemstatsmonitor/disk_collector.go b/pkg/systemstatsmonitor/disk_collector.go index b83281113..6bb07b60d 100644 --- a/pkg/systemstatsmonitor/disk_collector.go +++ b/pkg/systemstatsmonitor/disk_collector.go @@ -49,7 +49,8 @@ func NewDiskCollectorOrDie(diskConfig *ssmtypes.DiskStatsConfig) *diskCollector // Use metrics.Sum aggregation method to ensure the metric is a counter/cumulative metric. dc.mIOTime, err = metrics.NewInt64Metric( - diskConfig.MetricsConfigs["disk/io_time"].DisplayName, + metrics.DiskIOTimeID, + diskConfig.MetricsConfigs[string(metrics.DiskIOTimeID)].DisplayName, "The IO time spent on the disk", "second", metrics.Sum, @@ -60,7 +61,8 @@ func NewDiskCollectorOrDie(diskConfig *ssmtypes.DiskStatsConfig) *diskCollector // Use metrics.Sum aggregation method to ensure the metric is a counter/cumulative metric. dc.mWeightedIO, err = metrics.NewInt64Metric( - diskConfig.MetricsConfigs["disk/weighted_io"].DisplayName, + metrics.DiskWeightedIOID, + diskConfig.MetricsConfigs[string(metrics.DiskWeightedIOID)].DisplayName, "The weighted IO on the disk", "second", metrics.Sum, @@ -70,7 +72,8 @@ func NewDiskCollectorOrDie(diskConfig *ssmtypes.DiskStatsConfig) *diskCollector } dc.mAvgQueueLen, err = metrics.NewFloat64Metric( - diskConfig.MetricsConfigs["disk/avg_queue_len"].DisplayName, + metrics.DiskAvgQueueLenID, + diskConfig.MetricsConfigs[string(metrics.DiskAvgQueueLenID)].DisplayName, "The average queue length on the disk", "second", metrics.LastValue, diff --git a/pkg/systemstatsmonitor/host_collector.go b/pkg/systemstatsmonitor/host_collector.go index 5cdafe048..fe1bccd0e 100644 --- a/pkg/systemstatsmonitor/host_collector.go +++ b/pkg/systemstatsmonitor/host_collector.go @@ -49,7 +49,8 @@ func NewHostCollectorOrDie(hostConfig *ssmtypes.HostStatsConfig) *hostCollector // Use metrics.Sum aggregation method to ensure the metric is a counter/cumulative metric. if hostConfig.MetricsConfigs["host/uptime"].DisplayName != "" { hc.uptime, err = metrics.NewInt64Metric( - hostConfig.MetricsConfigs["host/uptime"].DisplayName, + metrics.HostUptimeID, + hostConfig.MetricsConfigs[string(metrics.HostUptimeID)].DisplayName, "The uptime of the operating system", "second", metrics.Sum, diff --git a/pkg/types/types.go b/pkg/types/types.go index b981b282c..82c9fcff3 100644 --- a/pkg/types/types.go +++ b/pkg/types/types.go @@ -131,3 +131,19 @@ type ProblemDaemonHandler struct { // CmdOptionDescription explains how to configure the problem daemon from command line arguments. CmdOptionDescription string } + +// ExporterType is the type of the exporter. +type ExporterType string + +// ExporterConfigPathMap represents configurations on all types of exporters: +// 1) Each key represents a type of exporter. +// 2) Each value represents the config file paths for the exporter. +type ExporterConfigPathMap map[ExporterType]*[]string + +// ExporterHandler represents the initialization handler for a type of exporter. +type ExporterHandler struct { + // CreateExporterOrDie initializes an exporter, panic if error occurs. + CreateExporterOrDie func(string) Exporter + // CmdOptionDescription explains how to configure the exporter from command line arguments. + CmdOptionDescription string +} diff --git a/pkg/util/metrics/helpers.go b/pkg/util/metrics/helpers.go index 3cc6952ff..5e1d79c51 100644 --- a/pkg/util/metrics/helpers.go +++ b/pkg/util/metrics/helpers.go @@ -16,15 +16,12 @@ limitations under the License. package metrics import ( - "context" "fmt" "strings" "sync" pcm "github.com/prometheus/client_model/go" "github.com/prometheus/common/expfmt" - "go.opencensus.io/stats" - "go.opencensus.io/stats/view" "go.opencensus.io/tag" ) @@ -47,152 +44,6 @@ const ( Sum Aggregation = "Sum" ) -// Int64MetricRepresentation represents a snapshot of an int64 metrics. -// This is used for inspecting metric internals. -type Int64MetricRepresentation struct { - // Name is the metric name. - Name string - // Labels contains all metric labels in key-value pair format. - Labels map[string]string - // Value is the value of the metric. - Value int64 -} - -// Int64Metric represents an int64 metric. -type Int64Metric struct { - name string - measure *stats.Int64Measure -} - -// NewInt64Metric create a Int64Metric metric, returns nil when name is empty. -func NewInt64Metric(name string, description string, unit string, aggregation Aggregation, tagNames []string) (*Int64Metric, error) { - if name == "" { - return nil, nil - } - - tagKeys, err := getTagKeysFromNames(tagNames) - if err != nil { - return nil, fmt.Errorf("failed to create metric %q because of tag creation failure: %v", name, err) - } - - var aggregationMethod *view.Aggregation - switch aggregation { - case LastValue: - aggregationMethod = view.LastValue() - case Sum: - aggregationMethod = view.Sum() - default: - return nil, fmt.Errorf("unknown aggregation option %q", aggregation) - } - - measure := stats.Int64(name, description, unit) - newView := &view.View{ - Name: name, - Measure: measure, - Description: description, - Aggregation: aggregationMethod, - TagKeys: tagKeys, - } - view.Register(newView) - - metric := Int64Metric{name, measure} - return &metric, nil -} - -// Record records a measurement for the metric, with provided tags as metric labels. -func (metric *Int64Metric) Record(tags map[string]string, measurement int64) error { - var mutators []tag.Mutator - - tagMapMutex.RLock() - defer tagMapMutex.RUnlock() - - for tagName, tagValue := range tags { - tagKey, ok := tagMap[tagName] - if !ok { - return fmt.Errorf("referencing none existing tag %q in metric %q", tagName, metric.name) - } - mutators = append(mutators, tag.Upsert(tagKey, tagValue)) - } - - return stats.RecordWithTags( - context.Background(), - mutators, - metric.measure.M(measurement)) -} - -// Float64MetricRepresentation represents a snapshot of a float64 metrics. -// This is used for inspecting metric internals. -type Float64MetricRepresentation struct { - // Name is the metric name. - Name string - // Labels contains all metric labels in key-value pair format. - Labels map[string]string - // Value is the value of the metric. - Value float64 -} - -// Float64Metric represents an float64 metric. -type Float64Metric struct { - name string - measure *stats.Float64Measure -} - -// NewFloat64Metric create a Float64Metric metrics, returns nil when name is empty. -func NewFloat64Metric(name string, description string, unit string, aggregation Aggregation, tagNames []string) (*Float64Metric, error) { - if name == "" { - return nil, nil - } - - tagKeys, err := getTagKeysFromNames(tagNames) - if err != nil { - return nil, fmt.Errorf("failed to create metric %q because of tag creation failure: %v", name, err) - } - - var aggregationMethod *view.Aggregation - switch aggregation { - case LastValue: - aggregationMethod = view.LastValue() - case Sum: - aggregationMethod = view.Sum() - default: - return nil, fmt.Errorf("unknown aggregation option %q", aggregation) - } - - measure := stats.Float64(name, description, unit) - newView := &view.View{ - Name: name, - Measure: measure, - Description: description, - Aggregation: aggregationMethod, - TagKeys: tagKeys, - } - view.Register(newView) - - metric := Float64Metric{name, measure} - return &metric, nil -} - -// Record records a measurement for the metric, with provided tags as metric labels. -func (metric *Float64Metric) Record(tags map[string]string, measurement float64) error { - var mutators []tag.Mutator - - tagMapMutex.RLock() - defer tagMapMutex.RUnlock() - - for tagName, tagValue := range tags { - tagKey, ok := tagMap[tagName] - if !ok { - return fmt.Errorf("referencing none existing tag %q in metric %q", tagName, metric.name) - } - mutators = append(mutators, tag.Upsert(tagKey, tagValue)) - } - - return stats.RecordWithTags( - context.Background(), - mutators, - metric.measure.M(measurement)) -} - func getTagKeysFromNames(tagNames []string) ([]tag.Key, error) { tagMapMutex.Lock() defer tagMapMutex.Unlock() diff --git a/pkg/util/metrics/metric_float64.go b/pkg/util/metrics/metric_float64.go new file mode 100644 index 000000000..fab1ec017 --- /dev/null +++ b/pkg/util/metrics/metric_float64.go @@ -0,0 +1,100 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package metrics + +import ( + "context" + "fmt" + + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" +) + +// Float64MetricRepresentation represents a snapshot of a float64 metrics. +// This is used for inspecting metric internals. +type Float64MetricRepresentation struct { + // Name is the metric name. + Name string + // Labels contains all metric labels in key-value pair format. + Labels map[string]string + // Value is the value of the metric. + Value float64 +} + +// Float64Metric represents an float64 metric. +type Float64Metric struct { + name string + measure *stats.Float64Measure +} + +// NewFloat64Metric create a Float64Metric metrics, returns nil when viewName is empty. +func NewFloat64Metric(metricID MetricID, viewName string, description string, unit string, aggregation Aggregation, tagNames []string) (*Float64Metric, error) { + if viewName == "" { + return nil, nil + } + + MetricMap.AddMapping(metricID, viewName) + + tagKeys, err := getTagKeysFromNames(tagNames) + if err != nil { + return nil, fmt.Errorf("failed to create metric %q because of tag creation failure: %v", viewName, err) + } + + var aggregationMethod *view.Aggregation + switch aggregation { + case LastValue: + aggregationMethod = view.LastValue() + case Sum: + aggregationMethod = view.Sum() + default: + return nil, fmt.Errorf("unknown aggregation option %q", aggregation) + } + + measure := stats.Float64(viewName, description, unit) + newView := &view.View{ + Name: viewName, + Measure: measure, + Description: description, + Aggregation: aggregationMethod, + TagKeys: tagKeys, + } + view.Register(newView) + + metric := Float64Metric{viewName, measure} + return &metric, nil +} + +// Record records a measurement for the metric, with provided tags as metric labels. +func (metric *Float64Metric) Record(tags map[string]string, measurement float64) error { + var mutators []tag.Mutator + + tagMapMutex.RLock() + defer tagMapMutex.RUnlock() + + for tagName, tagValue := range tags { + tagKey, ok := tagMap[tagName] + if !ok { + return fmt.Errorf("referencing none existing tag %q in metric %q", tagName, metric.name) + } + mutators = append(mutators, tag.Upsert(tagKey, tagValue)) + } + + return stats.RecordWithTags( + context.Background(), + mutators, + metric.measure.M(measurement)) +} diff --git a/pkg/util/metrics/metric_id.go b/pkg/util/metrics/metric_id.go new file mode 100644 index 000000000..ee54429b5 --- /dev/null +++ b/pkg/util/metrics/metric_id.go @@ -0,0 +1,70 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package metrics + +import ( + "sync" +) + +const ( + ProblemCounterID MetricID = "problem_counter" + ProblemGaugeID MetricID = "problem_gauge" + DiskIOTimeID MetricID = "disk/io_time" + DiskWeightedIOID MetricID = "disk/weighted_io" + DiskAvgQueueLenID MetricID = "disk/avg_queue_len" + HostUptimeID MetricID = "host/uptime" +) + +var MetricMap MetricMapping + +func init() { + MetricMap.mapMutex.Lock() + MetricMap.metricIDToViewNameMap = make(map[MetricID]string) + MetricMap.viewNameToMetricIDMap = make(map[string]MetricID) + MetricMap.mapMutex.Unlock() +} + +type MetricID string + +type MetricMapping struct { + metricIDToViewNameMap map[MetricID]string + viewNameToMetricIDMap map[string]MetricID + mapMutex sync.RWMutex +} + +func (mm *MetricMapping) AddMapping(metricID MetricID, viewName string) { + mm.mapMutex.Lock() + defer mm.mapMutex.Unlock() + + mm.metricIDToViewNameMap[metricID] = viewName + mm.viewNameToMetricIDMap[viewName] = metricID +} + +func (mm *MetricMapping) MetricIDToViewName(metricID MetricID) (string, bool) { + mm.mapMutex.RLock() + defer mm.mapMutex.RUnlock() + + viewName, ok := mm.metricIDToViewNameMap[metricID] + return viewName, ok +} + +func (mm *MetricMapping) ViewNameToMetricID(viewName string) (MetricID, bool) { + mm.mapMutex.RLock() + defer mm.mapMutex.RUnlock() + + id, ok := mm.viewNameToMetricIDMap[viewName] + return id, ok +} diff --git a/pkg/util/metrics/metric_int64.go b/pkg/util/metrics/metric_int64.go new file mode 100644 index 000000000..a01626e14 --- /dev/null +++ b/pkg/util/metrics/metric_int64.go @@ -0,0 +1,100 @@ +/* +Copyright 2019 The Kubernetes Authors All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +package metrics + +import ( + "context" + "fmt" + + "go.opencensus.io/stats" + "go.opencensus.io/stats/view" + "go.opencensus.io/tag" +) + +// Int64MetricRepresentation represents a snapshot of an int64 metrics. +// This is used for inspecting metric internals. +type Int64MetricRepresentation struct { + // Name is the metric name. + Name string + // Labels contains all metric labels in key-value pair format. + Labels map[string]string + // Value is the value of the metric. + Value int64 +} + +// Int64Metric represents an int64 metric. +type Int64Metric struct { + name string + measure *stats.Int64Measure +} + +// NewInt64Metric create a Int64Metric metric, returns nil when viewName is empty. +func NewInt64Metric(metricID MetricID, viewName string, description string, unit string, aggregation Aggregation, tagNames []string) (*Int64Metric, error) { + if viewName == "" { + return nil, nil + } + + MetricMap.AddMapping(metricID, viewName) + + tagKeys, err := getTagKeysFromNames(tagNames) + if err != nil { + return nil, fmt.Errorf("failed to create metric %q because of tag creation failure: %v", viewName, err) + } + + var aggregationMethod *view.Aggregation + switch aggregation { + case LastValue: + aggregationMethod = view.LastValue() + case Sum: + aggregationMethod = view.Sum() + default: + return nil, fmt.Errorf("unknown aggregation option %q", aggregation) + } + + measure := stats.Int64(viewName, description, unit) + newView := &view.View{ + Name: viewName, + Measure: measure, + Description: description, + Aggregation: aggregationMethod, + TagKeys: tagKeys, + } + view.Register(newView) + + metric := Int64Metric{viewName, measure} + return &metric, nil +} + +// Record records a measurement for the metric, with provided tags as metric labels. +func (metric *Int64Metric) Record(tags map[string]string, measurement int64) error { + var mutators []tag.Mutator + + tagMapMutex.RLock() + defer tagMapMutex.RUnlock() + + for tagName, tagValue := range tags { + tagKey, ok := tagMap[tagName] + if !ok { + return fmt.Errorf("referencing none existing tag %q in metric %q", tagName, metric.name) + } + mutators = append(mutators, tag.Upsert(tagKey, tagValue)) + } + + return stats.RecordWithTags( + context.Background(), + mutators, + metric.measure.M(measurement)) +} diff --git a/test/e2e-install.sh b/test/e2e-install.sh index ee5d29a4f..63016c726 100755 --- a/test/e2e-install.sh +++ b/test/e2e-install.sh @@ -54,6 +54,9 @@ function install-npd() { # Below remount is to work around COS's noexec mount on /home. mount -o remount,exec "${BIN_DIR}" + echo "Shutting down NPD service if it exist." + systemctl stop node-problem-detector.service || true + echo "Installing NPD binary." cp "${workdir}"/bin/node-problem-detector "${BIN_DIR}" @@ -72,7 +75,6 @@ function install-npd() { # Start systemd service. echo "Starting NPD systemd service." systemctl daemon-reload - systemctl stop node-problem-detector.service || true systemctl start node-problem-detector.service }